From 061e1d4310cd0ff3e1efacd5a6898d6dccf11a3f Mon Sep 17 00:00:00 2001
From: Maxim Koltsov <kolmax94@gmail.com>
Date: Tue, 4 Oct 2022 10:54:34 +0300
Subject: [PATCH] version 0.1.5.0 rewrite FASTA parser to Megaparsec (#67)

Co-authored-by: Grigoriy <mikheevg@Grigoriys-MacBook-Pro.local>
Co-authored-by: Grigoriy Mikheev <grigoriymihtt@gmail.com>
Co-authored-by: Grigoriy Mikheev <mikheevg.@biocad.ru>
---
 ChangeLog.md            |   4 +
 default.nix             |  12 ++
 package.yaml            |   3 +-
 shell.nix               |   1 +
 src/Bio/FASTA.hs        |   7 +-
 src/Bio/FASTA/Parser.hs | 259 +++++++++++++++++++++-------------------
 src/Bio/FASTA/Type.hs   |  12 +-
 test/FASTA/order1.fasta |  14 +++
 test/FASTA/order2.fasta |   5 +
 test/FASTA/order3.fasta |   2 +
 test/FASTA/order4.fasta |  27 +++++
 test/FASTA/order5.fasta |   9 ++
 test/FASTA/order6.fasta |  26 ++++
 test/FASTA/order7.fasta |  11 ++
 test/FASTA/order8.fasta |  22 ++++
 test/FASTASpec.hs       |  86 +++++++++----
 test/FastaParserSpec.hs |  70 +++++++----
 17 files changed, 394 insertions(+), 176 deletions(-)
 create mode 100644 default.nix
 create mode 100644 shell.nix
 create mode 100644 test/FASTA/order1.fasta
 create mode 100644 test/FASTA/order2.fasta
 create mode 100644 test/FASTA/order3.fasta
 create mode 100644 test/FASTA/order4.fasta
 create mode 100644 test/FASTA/order5.fasta
 create mode 100644 test/FASTA/order6.fasta
 create mode 100644 test/FASTA/order7.fasta
 create mode 100644 test/FASTA/order8.fasta

diff --git a/ChangeLog.md b/ChangeLog.md
index 841c1a7..3cec239 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+## [0.1.5.0] - 2022-09-30
+### Changed
+- Update FASTA parser to megaparsec.
+
 ## [0.1.4.4] - 2022-06-02
 ### Changed
 - Update more dependencies;
diff --git a/default.nix b/default.nix
new file mode 100644
index 0000000..314ae3b
--- /dev/null
+++ b/default.nix
@@ -0,0 +1,12 @@
+let
+  bcd-lts = import (builtins.fetchGit {
+    url = "git@github.com:biocad/nix-lts.git";
+    ref = "master";
+  });
+in
+bcd-lts.mkBiocadProject {
+  src = bcd-lts.pkgs.haskell-nix.haskellLib.cleanGit { name = "cobot-io"; src = ./.; };
+  shellArgs = {
+    buildInputs = [ bcd-lts.pkgs.RNA ];
+  };
+}
diff --git a/package.yaml b/package.yaml
index 0d4cde8..21dd9d0 100644
--- a/package.yaml
+++ b/package.yaml
@@ -1,5 +1,5 @@
 name:                cobot-io
-version:             0.1.4.4
+version:             0.1.5.0
 github:              "biocad/cobot-io"
 license:             BSD3
 category:            Bio
@@ -41,6 +41,7 @@ dependencies:
 - containers >= 0.5.7.1 && < 0.7
 - data-msgpack >= 0.0.9 && < 0.1
 - deepseq >= 1.4 && < 1.5
+- filepath
 - http-conduit >= 2.3 && < 2.4
 - hyraxAbif >= 0.2.3.27 && < 0.2.4.0
 - lens >= 4.16 && < 5.2
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..76918f6
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1 @@
+(import ./default.nix).shellFor
diff --git a/src/Bio/FASTA.hs b/src/Bio/FASTA.hs
index d9dfbba..0c2ff4b 100644
--- a/src/Bio/FASTA.hs
+++ b/src/Bio/FASTA.hs
@@ -6,14 +6,15 @@ module Bio.FASTA
   , fromFile
   , toFile
   , fastaP
-  , fastaPGeneric
   , fastaLine
   , modificationP
+  , Parser
   ) where
 
 import Control.Monad.IO.Class (MonadIO, liftIO)
-import Data.Attoparsec.Text   (parseOnly)
 import Data.Text.IO           (readFile, writeFile)
+import System.FilePath        (takeBaseName)
+import Text.Megaparsec        (errorBundlePretty, parse)
 #if !MIN_VERSION_base(4,13,0)
 import Control.Monad.Fail (MonadFail (..))
 import Prelude            hiding (fail, readFile, writeFile)
@@ -28,7 +29,7 @@ import Bio.FASTA.Writer (WritableFastaToken (..), fastaToText)
 -- | Reads 'FastaSequence' from given file.
 --
 fromFile :: (MonadFail m, MonadIO m) => FilePath -> m (Fasta Char)
-fromFile f = liftIO (readFile f) >>= either fail pure . parseOnly fastaP
+fromFile f = liftIO (readFile f) >>= either (fail . errorBundlePretty) pure . parse fastaP (takeBaseName f)
 
 -- | Writes 'FastaSequence' to file.
 --
diff --git a/src/Bio/FASTA/Parser.hs b/src/Bio/FASTA/Parser.hs
index a5aa3a1..4eb7cc0 100644
--- a/src/Bio/FASTA/Parser.hs
+++ b/src/Bio/FASTA/Parser.hs
@@ -2,151 +2,168 @@
 
 module Bio.FASTA.Parser
   ( fastaP
-  , fastaPGeneric
-  , fastaLine 
+  , fastaLine
+  , parseOnly
   , modificationP
+  , fastaPGeneric
+  , Parser
   ) where
 
-import Bio.FASTA.Type       (Fasta, FastaItem (..), ModItem (..), Modification (..),
-                             ParsableFastaToken (..))
-import Bio.Sequence         (BareSequence, bareSequence)
-import Control.Applicative  ((<|>))
-import Data.Attoparsec.Text (Parser, char, choice, endOfInput, endOfLine, many', many1', satisfy,
-                             skipWhile, space, string, takeWhile, try)
-import Data.Char            (isAlphaNum, isLetter, isSpace)
-import Data.Text            (Text, strip)
-import Prelude              hiding (takeWhile)
+import           Bio.FASTA.Type             (Fasta, FastaItem (..),
+                                             ModItem (..), Modification (..),
+                                             ParsableFastaToken (..))
+import           Bio.Sequence               (BareSequence, bareSequence)
+import           Data.Bifunctor             (first)
+import           Data.Char                  (isLetter)
+import           Data.Functor               (void, ($>))
+import           Data.Text                  (Text, pack, strip)
+import           Data.Void                  (Void)
+import           Text.Megaparsec
+import           Text.Megaparsec.Char
+import qualified Text.Megaparsec.Char.Lexer as L
 
 instance ParsableFastaToken Char where
-    parseToken = satisfy
+  parseToken p = satisfy p <?> "letter"
 
 instance ParsableFastaToken ModItem where
-    parseToken predicate = (Mod <$> modificationP) <|> (Letter <$> satisfy predicate)
+  parseToken p = (Mod <$> modificationP <?> "fasta item modification") <|> (Letter <$> satisfy p <?> "letter")
 
+type Parser = Parsec Void Text
 
 -- | Parser of .fasta file.
 --
+
+parseOnly :: Parsec Void Text a -> Text -> Either String a
+parseOnly p s = first errorBundlePretty $ parse p "input.fasta" s
+
+sc :: Parser ()
+sc = L.space space1 empty empty
+
+lexeme :: Parser a -> Parser a
+lexeme = L.lexeme sc
+
+symbol :: Text -> Parser Text
+symbol = L.symbol sc
+
 fastaP :: ParsableFastaToken a => Parser (Fasta a)
-fastaP = many' space *> fastaPGeneric isLetter
+fastaP = many (item isLetter) <* hidden space <* eof
 
 fastaPGeneric :: ParsableFastaToken a => (Char -> Bool) -> Parser (Fasta a)
-fastaPGeneric = many' . item
+fastaPGeneric p  = many (item p) <* hidden space <* eof
 
 item :: ParsableFastaToken a => (Char -> Bool) -> Parser (FastaItem a)
-item predicate = (FastaItem <$> seqName <*> fastaSeq predicate) <* skipWhile isSpace
+item p =
+  FastaItem
+    <$> seqName
+    <*> (fastaSeq p <?> "sequence")
 
 seqName :: Parser Text
-seqName = strip <$> (char '>' *> tabs *> takeWhile (`notElem` ['\n', '\r']) <* tabs <* eol)
+seqName = strip . pack <$> (symbol ">" *> (manyTill anySingle myEnd <?> "sequence name"))
 
 fastaSeq :: ParsableFastaToken a => (Char -> Bool) -> Parser (BareSequence a)
-fastaSeq predicate = bareSequence . mconcat <$> many' (fastaLine predicate)
+fastaSeq p = bareSequence . concat <$> many (fastaLine p) <* hidden space
 
 fastaLine :: ParsableFastaToken a => (Char -> Bool) -> Parser [a]
-fastaLine predicate = concat <$> many1' (many1' (parseToken predicate) <* many' (char ' ')) <* eol
-
-eol :: Parser ()
-eol = tabs *> choice [slashN, endOfInput]
-
-slashN :: Parser ()
-slashN = () <$ many1' endOfLine
+fastaLine p = concat <$> some (some (parseToken p) <* hidden hspace) <* myEnd
 
-tabs :: Parser ()
-tabs = () <$ many' (char '\t')
+myEnd :: Parser ()
+myEnd = void (some eol) <|> eof
 
 modificationP :: Parser Modification
-modificationP 
-  =   string "[A*]" *> pure Mod_A_Star
-  <|> string "[C*]" *> pure Mod_C_Star
-  <|> string "[G*]" *> pure Mod_G_Star
-  <|> string "[T*]" *> pure Mod_T_Star
-  <|> string "[rA]" *> pure Mod_rA
-  <|> string "[rC]" *> pure Mod_rC
-  <|> string "[rG]" *> pure Mod_rG
-  <|> string "[rU]" *> pure Mod_rU
-  <|> string "[+A]" *> pure Mod_Plus_A
-  <|> string "[+C]" *> pure Mod_Plus_C
-  <|> string "[+G]" *> pure Mod_Plus_G
-  <|> string "[+T]" *> pure Mod_Plus_T
-  <|> string "[rAf]" *> pure Mod_rAf
-  <|> string "[rCf]" *> pure Mod_rCf
-  <|> string "[rGf]" *> pure Mod_rGf
-  <|> string "[rUf]" *> pure Mod_rUf
-  <|> string "[mA]" *> pure Mod_mA
-  <|> string "[mC]" *> pure Mod_mC
-  <|> string "[mG]" *> pure Mod_mG
-  <|> string "[mU]" *> pure Mod_mU
-  <|> string "[mA*]" *> pure Mod_mA_Star
-  <|> string "[mC*]" *> pure Mod_mC_Star
-  <|> string "[mG*]" *> pure Mod_mG_Star
-  <|> string "[mU*]" *> pure Mod_mU_Star
-  <|> string "[dU]" *> pure Mod_dU
-  <|> string "[5Bio]" *> pure Mod_5Bio
-  <|> string "[iBio]" *> pure Mod_iBio
-  <|> string "[56FAM]" *> pure Mod_56FAM
-  <|> string "[36FAM]" *> pure Mod_36FAM
-  <|> string "[5HEX]" *> pure Mod_5HEX
-  <|> string "[5TMR]" *> pure Mod_5TMR
-  <|> string "[3BHQ1]" *> pure Mod_3BHQ1
-  <|> string "[3BHQ2]" *> pure Mod_3BHQ2
-  <|> string "[5NH2]" *> pure Mod_5NH2
-  <|> string "[3NH2]" *> pure Mod_3NH2
-  <|> string "[5PO4]" *> pure Mod_5PO4
-  <|> string "[3PO4]" *> pure Mod_3PO4
-  <|> string "[3BioTEG]" *> pure Mod_3BioTEG
-  <|> string "[C12]" *> pure Mod_C12
-  <|> string "[NHSdT]" *> pure Mod_NHSdT
-  <|> string "[5Mal]" *> pure Mod_5Mal
-  <|> string "[5thio]" *> pure Mod_5thio
-  <|> string "[3thio]" *> pure Mod_3thio
-  <|> string "[3azide]" *> pure Mod_3azide
-  <|> string "[3alkine]" *> pure Mod_3alkine
-  <|> string "[5CholTEG]" *> pure Mod_5CholTEG
-  <|> string "[3CholTEG]" *> pure Mod_3CholTEG
-  <|> string "[5C10]" *> pure Mod_5C10
-  <|> string "[5Alk]" *> pure Mod_5Alk
-  <|> string "[GC]" *> pure Mod_GC
-  <|> string "[GT]" *> pure Mod_GT
-  <|> string "[AT]" *> pure Mod_AT
-  <|> string "[TG]" *> pure Mod_TG
-  <|> string "[AC]" *> pure Mod_AC
-  <|> string "[CC]" *> pure Mod_CC
-  <|> string "[AA]" *> pure Mod_AA
-  <|> string "[TC]" *> pure Mod_TC
-  <|> string "[TT]" *> pure Mod_TT
-  <|> string "[CG]" *> pure Mod_CG
-  <|> string "[GG]" *> pure Mod_GG
-  <|> string "[AG]" *> pure Mod_AG
-  <|> string "[GA]" *> pure Mod_GA
-  <|> string "[CA]" *> pure Mod_CA
-  <|> string "[CT]" *> pure Mod_CT
-  <|> string "[TA]" *> pure Mod_TA
-  <|> string "[AAA]" *> pure Mod_AAA
-  <|> string "[AAC]" *> pure Mod_AAC
-  <|> string "[ACT]" *> pure Mod_ACT
-  <|> string "[ATC]" *> pure Mod_ATC
-  <|> string "[ATG]" *> pure Mod_ATG
-  <|> string "[CAG]" *> pure Mod_CAG
-  <|> string "[AGA]" *> pure Mod_AGA
-  <|> string "[CAT]" *> pure Mod_CAT
-  <|> string "[CCG]" *> pure Mod_CCG
-  <|> string "[CGT]" *> pure Mod_CGT
-  <|> string "[CTG]" *> pure Mod_CTG
-  <|> string "[GAA]" *> pure Mod_GAA
-  <|> string "[GAC]" *> pure Mod_GAC
-  <|> string "[GCT]" *> pure Mod_GCT
-  <|> string "[GGT]" *> pure Mod_GGT
-  <|> string "[GTT]" *> pure Mod_GTT
-  <|> string "[TAC]" *> pure Mod_TAC
-  <|> string "[TCT]" *> pure Mod_TCT
-  <|> string "[TGC]" *> pure Mod_TGC
-  <|> string "[TGG]" *> pure Mod_TGG
-  <|> string "[TTC]" *> pure Mod_TTC
-  <|> string "[TTT]" *> pure Mod_TTT
-  <|> unknownP
+modificationP
+  = choice
+  [ string "[A*]" $> Mod_A_Star
+  , string "[C*]" $> Mod_C_Star
+  , string "[G*]" $> Mod_G_Star
+  , string "[T*]" $> Mod_T_Star
+  , string "[rA]" $> Mod_rA
+  , string "[rC]" $> Mod_rC
+  , string "[rG]" $> Mod_rG
+  , string "[rU]" $> Mod_rU
+  , string "[+A]" $> Mod_Plus_A
+  , string "[+C]" $> Mod_Plus_C
+  , string "[+G]" $> Mod_Plus_G
+  , string "[+T]" $> Mod_Plus_T
+  , string "[rAf]" $> Mod_rAf
+  , string "[rCf]" $> Mod_rCf
+  , string "[rGf]" $> Mod_rGf
+  , string "[rUf]" $> Mod_rUf
+  , string "[mA]" $> Mod_mA
+  , string "[mC]" $> Mod_mC
+  , string "[mG]" $> Mod_mG
+  , string "[mU]" $> Mod_mU
+  , string "[mA*]" $> Mod_mA_Star
+  , string "[mC*]" $> Mod_mC_Star
+  , string "[mG*]" $> Mod_mG_Star
+  , string "[mU*]" $> Mod_mU_Star
+  , string "[dU]" $> Mod_dU
+  , string "[5Bio]" $> Mod_5Bio
+  , string "[iBio]" $> Mod_iBio
+  , string "[56FAM]" $> Mod_56FAM
+  , string "[36FAM]" $> Mod_36FAM
+  , string "[5HEX]" $> Mod_5HEX
+  , string "[5TMR]" $> Mod_5TMR
+  , string "[3BHQ1]" $> Mod_3BHQ1
+  , string "[3BHQ2]" $> Mod_3BHQ2
+  , string "[5NH2]" $> Mod_5NH2
+  , string "[3NH2]" $> Mod_3NH2
+  , string "[5PO4]" $> Mod_5PO4
+  , string "[3PO4]" $> Mod_3PO4
+  , string "[3BioTEG]" $> Mod_3BioTEG
+  , string "[C12]" $> Mod_C12
+  , string "[NHSdT]" $> Mod_NHSdT
+  , string "[5Mal]" $> Mod_5Mal
+  , string "[5thio]" $> Mod_5thio
+  , string "[3thio]" $> Mod_3thio
+  , string "[3azide]" $> Mod_3azide
+  , string "[3alkine]" $> Mod_3alkine
+  , string "[5CholTEG]" $> Mod_5CholTEG
+  , string "[3CholTEG]" $> Mod_3CholTEG
+  , string "[5C10]" $> Mod_5C10
+  , string "[5Alk]" $> Mod_5Alk
+  , string "[GC]" $> Mod_GC
+  , string "[GT]" $> Mod_GT
+  , string "[AT]" $> Mod_AT
+  , string "[TG]" $> Mod_TG
+  , string "[AC]" $> Mod_AC
+  , string "[CC]" $> Mod_CC
+  , string "[AA]" $> Mod_AA
+  , string "[TC]" $> Mod_TC
+  , string "[TT]" $> Mod_TT
+  , string "[CG]" $> Mod_CG
+  , string "[GG]" $> Mod_GG
+  , string "[AG]" $> Mod_AG
+  , string "[GA]" $> Mod_GA
+  , string "[CA]" $> Mod_CA
+  , string "[CT]" $> Mod_CT
+  , string "[TA]" $> Mod_TA
+  , string "[AAA]" $> Mod_AAA
+  , string "[AAC]" $> Mod_AAC
+  , string "[ACT]" $> Mod_ACT
+  , string "[ATC]" $> Mod_ATC
+  , string "[ATG]" $> Mod_ATG
+  , string "[CAG]" $> Mod_CAG
+  , string "[AGA]" $> Mod_AGA
+  , string "[CAT]" $> Mod_CAT
+  , string "[CCG]" $> Mod_CCG
+  , string "[CGT]" $> Mod_CGT
+  , string "[CTG]" $> Mod_CTG
+  , string "[GAA]" $> Mod_GAA
+  , string "[GAC]" $> Mod_GAC
+  , string "[GCT]" $> Mod_GCT
+  , string "[GGT]" $> Mod_GGT
+  , string "[GTT]" $> Mod_GTT
+  , string "[TAC]" $> Mod_TAC
+  , string "[TCT]" $> Mod_TCT
+  , string "[TGC]" $> Mod_TGC
+  , string "[TGG]" $> Mod_TGG
+  , string "[TTC]" $> Mod_TTC
+  , string "[TTT]" $> Mod_TTT
+  , unknownP
+  ]
 
 unknownP :: Parser Modification
-unknownP = try $ do
-    _ <- char '[' 
-    m <- many1' $ satisfy (\c -> isAlphaNum c || c `elem` ['+', '-', '*', '_'])
-    _ <- char ']'
-    pure $ Unknown ("[" <> m <> "]")
+unknownP = do
+  res <- between (symbol "[") (symbol "]")
+    (lexeme (some (alphaNumChar <|> choice (char <$> ['+', '-', '*', '_'])) <?> "modification name"))
+  pure $ Unknown ("[" <> res <> "]")
diff --git a/src/Bio/FASTA/Type.hs b/src/Bio/FASTA/Type.hs
index bb5a86f..f599e5c 100644
--- a/src/Bio/FASTA/Type.hs
+++ b/src/Bio/FASTA/Type.hs
@@ -7,10 +7,11 @@ module Bio.FASTA.Type
   , modificationToString
   ) where
 
-import Bio.Sequence         (BareSequence)
-import Data.Attoparsec.Text (Parser)
-import Data.Text            (Text)
-import GHC.Generics         (Generic)
+import Bio.Sequence    (BareSequence)
+import Data.Text       (Text)
+import Data.Void
+import GHC.Generics    (Generic)
+import Text.Megaparsec
 
 -- | Type alias for FASTA file.
 --  satisfies the following format : >(\s|\t)*[^\n\r]+(\s|\t)*(\n|\r)*((\w|\s)(\n|\r)*)*
@@ -29,7 +30,7 @@ data FastaItem a
   deriving (Eq, Show, Functor)
 
 class ParsableFastaToken a where
-    parseToken :: (Char -> Bool) -> Parser a
+    parseToken :: (Char -> Bool) -> Parsec Void Text a
 
 data ModItem
   = Mod Modification
@@ -216,4 +217,3 @@ modificationToString Mod_TGG      = "[TGG]"
 modificationToString Mod_TTC      = "[TTC]"
 modificationToString Mod_TTT      = "[TTT]"
 modificationToString (Unknown s)  = s
-
diff --git a/test/FASTA/order1.fasta b/test/FASTA/order1.fasta
new file mode 100644
index 0000000..927b811
--- /dev/null
+++ b/test/FASTA/order1.fasta
@@ -0,0 +1,14 @@
+>3HMX:A|PDBID|CHAIN|SEQUENCE
+IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSE
+VLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL
+
+>7HMX:A|PDBID|CHAIN|SEQUENCE
+EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
+VLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL
+
+> With_spaces 
+MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER
+MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER
+MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER
+
+> Empty_ha_ha_ha 
diff --git a/test/FASTA/order2.fasta b/test/FASTA/order2.fasta
new file mode 100644
index 0000000..3f70512
--- /dev/null
+++ b/test/FASTA/order2.fasta
@@ -0,0 +1,5 @@
+>Sample_name1
+ACGT....TCG
+
+>Sample_name2
+GTCA....TGC
\ No newline at end of file
diff --git a/test/FASTA/order3.fasta b/test/FASTA/order3.fasta
new file mode 100644
index 0000000..e690170
--- /dev/null
+++ b/test/FASTA/order3.fasta
@@ -0,0 +1,2 @@
+>N-His-E4Orf6-7-R2(115)
+TGATGGTGATGGTGATGcatGTGGTAAACTCGACTTTCACTTTTCTCTATCACTGATAGGGAGTGGTAAACTCGACTTTCACTTTTCTCTATCACTGATAGGGAaacagtcagcc		     
\ No newline at end of file
diff --git a/test/FASTA/order4.fasta b/test/FASTA/order4.fasta
new file mode 100644
index 0000000..38de7ad
--- /dev/null
+++ b/test/FASTA/order4.fasta
@@ -0,0 +1,27 @@
+>CMV-Lox2272-HindIII_R	
+aatcatAAGCTTataacttcgtataaagtatcctatacgaagttatagctctgcttatatagacctcccacc 	
+
+
+HindIII-BFP_F        	
+aagttatAAGCTTatgagcgagctgattaaggagaacatgc                                	
+                                   	
+>sPA-lox2272_R        	
+taatTGGCTAGCATataacttcgtataaagtatcctatacgaagttatgctgcatcacacaaaaaaccaacac	
+
+>NheI-GFP_F           	
+ttatATGCTAGCCAatggtgagcaagggcgagg                                        	
+
+>NotI-pMP_F           	
+tataatGCGGCCGCAGGTGGCac                        
+
+>CMV-LoxP-HindIII_R	
+attaatAAGCTTataacttcgtataatgtatgctatacgaagttatagctctgcttatatagacctcccacc 
+	                          	
+>sPA-NotI_R        	
+aattaaTGCGGCCGCgctgcatcacacaaaaaaccaacacac 
+                              	
+sPA-LoxP-NheI_R   	
+taatTGGCTAGCATataacttcgtataatgtatgctatacgaagttatgctgcatcacacaaaaaaccaacac
+	
+
+                                                 	                         	
\ No newline at end of file
diff --git a/test/FASTA/order5.fasta b/test/FASTA/order5.fasta
new file mode 100644
index 0000000..5979123
--- /dev/null
+++ b/test/FASTA/order5.fasta
@@ -0,0 +1,9 @@
+>qCHO49 F 
+TGGAGAGATGGCTCGAGGTT
+
+
+
+
+
+qCHO R 
+TGGTTGCTGGGAATTGAACTC 
diff --git a/test/FASTA/order6.fasta b/test/FASTA/order6.fasta
new file mode 100644
index 0000000..156488b
--- /dev/null
+++ b/test/FASTA/order6.fasta
@@ -0,0 +1,26 @@
+>CMV-Lox2272-HindIII_R	
+aatcatAAGCTTataacttcgtataaagtatcctatacgaagttatagctctgcttatatagacctcccacc 	
+
+>HindIII-BFP_F        	
+aagttatAAGCTTatgagcgagctgattaaggagaacatgc                                	
+                                   	
+>sPA-lox2272_R        	
+taatTGGCTAGCATataacttcgtataaagtatcctatacgaagttatgctgcatcacacaaaaaaccaacac	
+
+>NheI-GFP_F           	
+ttatATGCTAGCCAatggtgagcaagggcgagg                                        	
+
+>NotI-pMP_F           	
+tataatGCGGCCGCAGGTGGCac                        
+
+>CMV-LoxP-HindIII_R	
+attaatAAGCTTataacttcgtataatgtatgctatacgaagttatagctctgcttatatagacctcccacc 
+	                          	
+>sPA-NotI_R        	
+aattaaTGCGGCCGCgctgcatcacacaaaaaaccaacacac 
+               	
+sPA-LoxP-NheI_R   	
+taatTGGCTAGCATataacttcgtataatgtatgctatacgaagttatgctgcatcacacaaaaaaccaacac
+	
+
+                                                 	                         	
\ No newline at end of file
diff --git a/test/FASTA/order7.fasta b/test/FASTA/order7.fasta
new file mode 100644
index 0000000..0a097f5
--- /dev/null
+++ b/test/FASTA/order7.fasta
@@ -0,0 +1,11 @@
+>GB_F 
+5’-CTTCAAGAGAGAGACCTGCGT-3’
+
+>GB_R
+5’-GATGTTGTTGGCCACCTCG-3’
+
+>F8_GB20_F
+GCTACACCTTCAAGCACA
+
+>F8_GB20_R
+GGGTTCTCCATGCTCA
diff --git a/test/FASTA/order8.fasta b/test/FASTA/order8.fasta
new file mode 100644
index 0000000..f0fe91c
--- /dev/null
+++ b/test/FASTA/order8.fasta
@@ -0,0 +1,22 @@
+>Ampl_prcTnT_del
+tttttACGCGTtaatagtaatcaattacggggtcattagttcatagcccatatatggagttccggctgccttatcagcgtctcgggcactcacgtatctccgtccgacgggtttaaaatagcaaaactctgagcgctgctgccaaaatagcagctcacaagtgttgcattcctctctgggcgccgggcacattcctgctggctctgcccgccccccatatatggagttccgcgttacataacttacggtaaatgg
+>Ampl_MHCK7-1
+tttttACGCGTtaatagtaatcaattacggggtcattagttcatagcccatatatggagttccgccttcagattaaaaataactgaggtaagggcctgggtaggggaggtggtgtgagacgctcctgtctctcctctatctgcccatcggccctttggggaggaggaatgtgcccaaggactaaaaaaaggccatggagccagaggggcgagggcaacagacctttcatgggcaaaccttggggccctgctgtctagcatgcccc
+>Ampl_MHCK7-2
+accttggggccctgctgtctagcatgccccactacgggtctaggctgcccatgtaaggaggcaaggcctggggacacccgagatgcctggttataattaacccagacatgtggctgcccccccccccccaacacctgctgcctctaaaaataaccctgtccctggtggatcccctgcatgcgaagatcttcgaaccatatatggagttccgcgttacataacttacggtaaatgg
+>Amplicon1_MH
+tttttACGCGTtaatagtaatcaattacggggtcattagttcatagcccatatatggagttccgGTGCTGTCAGCCTTCCTTGACACCTCTGTCTCCTCAGGTGCCTGGCTCCCAGTCCCCAGAACGCCTCTCCTGTACCTTGCTTCCTAGCTGGGCCTTTCCTTCTCCTCTATAAATACCAGCTCTGGTATTTCGCCTTGGCAGCTGTagcagccactacgggtctaggctgcccatgtaaggaggcaaggcctgggga
+>Amplicon2_MH
+gctgcccatgtaaggaggcaaggcctggggacacccgagatgcctggttataattaacccagacatgtggctgcccccccccccccaacacctgctgcctgagcctcacccccaccccggtgcctgggtcttaggctctgtacaccatggaggagaagctcgctctaaaaataaccctgtcccccatatatggagttccgcgttacataacttacggtaaatgg
+>Ampl_MHCK7-1
+tttttACGCGTtaatagtaatcaattacggggtcattagttcatagcccatatatggagttccgccttcagattaaaaataactgaggtaagggcctgggtaggggaggtggtgtgagacgctcctgtctctcctctatctgcccatcggccctttggggaggaggaatgtgcccaaggactaaaaaaaggccatggagccagaggggcgagggcaacagacctttcatgggcaaaccttggggccctgctgtctagcatgcccc
+>Ampl_MHCK7-2
+accttggggccctgctgtctagcatgccccactacgggtctaggctgcccatgtaaggaggcaaggcctggggacacccgagatgcctggttataattaacccagacatgtggctgcccccccccccccaacacctgctgcctctaaaaataaccctgtccctggtggatcccctgcatgcgaagatcttcgaaccatatatggagttccgcgttacataacttacggtaaatgg
+>CMV + enhMH-1
+GTGCTGTCAGCCTTCCTTGACACCTCTGTCTCCTCAGGTGCCTGGCTCCCAGTCCCCAGAACGCCTCTCCTGTACCTTGCTTCCTAGCTGGGCCTTTCCTTCTCCTCTATAAATACCAGCTCTGGTATTTCGCCTTGGCAGCTGTagcagccactacgggtctaggctgcccatgtaaggaggcaaggcctggggacacccgagatgcctggttataattaacccagacatgtggctgcccccccccccccaacacctgctgcctgagcctcacccccaccccggtgcctgggtcttaggctctgtacac
+>CMV + enhMH-2
+cccggtgcctgggtcttaggctctgtacaccatggaggagaagctcgctctaaaaataaccctgtcccgtgatgcggttttggcagtacatcaatgggcgtggatagcggtttgactcacggggatttccaagtctccaccccattgacgtcaatgggagtttgttttggcaccaaaatcaacgggactttccaaaatgtcgtaacaactccgccccattgacgcaaatgggcggtaggcgtgtacggtgggaggtctatataagcagagct
+>CMV + enhMCK + prcTnT-1
+ccactacgggtctaggctgcccatgtaaggaggcaaggcctggggacacccgagatgcctggttataattaaccccaacacctgctgcccccccccccccaacacctgctgcctgagcctgagcggttaccccaccccggtgcctgggtcttaggctctgtacaccatggaggagaagctcgctctaaaaataaccctgtccctggtgggtgccttatcagcgtccccagccctgggaggtgacagctggctggcttgtgtcagcccctcgggcactcacgtatctccgt
+CMV + enhMCK + prcTnT-2
+tcagcccctcgggcactcacgtatctccgtccgacgggtttaaaatagcaaaactgtgatgcggttttggcagtacatcaatgggcgtggatagcggtttgactcacggggatttccaagtctccaccccattgacgtcaatgggagtttgttttggcaccaaaatcaacgggactttccaaaatgtcgtaacaactccgccccattgacgcaaatgggcggtaggcgtgtacggtgggaggtctatataagcagagct
\ No newline at end of file
diff --git a/test/FASTASpec.hs b/test/FASTASpec.hs
index c4f5b0a..01199ff 100644
--- a/test/FASTASpec.hs
+++ b/test/FASTASpec.hs
@@ -2,37 +2,79 @@
 
 module FASTASpec where
 
-import Bio.FASTA        (fromFile, toFile)
-import Bio.FASTA.Type   (Fasta, FastaItem (..))
-import Bio.Sequence     (bareSequence)
-import Prelude          hiding (readFile, writeFile)
-import System.Directory (removeFile)
+import Bio.FASTA              (fastaP, fromFile, toFile)
+import Bio.FASTA.Parser       (parseOnly)
+import Bio.FASTA.Type         (Fasta, FastaItem (..))
+import Bio.Sequence           (bareSequence)
+import Control.Monad.IO.Class (liftIO)
+import Data.Text.IO           (readFile)
+import Prelude                hiding (readFile, writeFile)
+import System.Directory       (removeFile)
 import Test.Hspec
 
-correctFasta :: Fasta Char
-correctFasta = [ FastaItem "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
-               , FastaItem "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
-               , FastaItem "With_spaces" (bareSequence "MDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRER")
-               , FastaItem "Empty_ha_ha_ha" (bareSequence "")
-               ]
+correctFasta1 :: Fasta Char
+correctFasta1 = [ FastaItem "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
+                , FastaItem "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
+                , FastaItem "With_spaces" (bareSequence "MDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRER")
+                , FastaItem "Empty_ha_ha_ha" (bareSequence "")
+                ]
+
+badFasta2 :: Either String (Fasta Char)
+badFasta2 = Left "input.fasta:2:5:\n  |\n2 | ACGT....TCG\r\n  |     ^^\nunexpected \"..\"\nexpecting end of input, end of line, or letter\n"
+
+
+correctFasta3 :: Fasta Char
+correctFasta3 = [ FastaItem "N-His-E4Orf6-7-R2(115)" (bareSequence "TGATGGTGATGGTGATGcatGTGGTAAACTCGACTTTCACTTTTCTCTATCACTGATAGGGAGTGGTAAACTCGACTTTCACTTTTCTCTATCACTGATAGGGAaacagtcagcc")
+                ]
+
+badFasta4 :: Either String (Fasta Char)
+badFasta4 = Left "input.fasta:5:8:\n  |\n5 | HindIII-BFP_F                \r\n  |        ^^\nunexpected \"-B\"\nexpecting end of input, end of line, or letter\n"
+
+correctFasta5 :: Fasta Char
+correctFasta5 = [FastaItem "qCHO49 F" (bareSequence "TGGAGAGATGGCTCGAGGTTqCHORTGGTTGCTGGGAATTGAACTC")]
+
+badFasta6 :: Either String (Fasta Char)
+badFasta6 = Left "input.fasta:22:1:\n   |\n22 | sPA-LoxP-NheI_R           \r\n   | ^\nunexpected 's'\nexpecting '>' or end of input\n"
+
+badFasta7 :: Either String (Fasta Char)
+badFasta7 = Left "input.fasta:2:1:\n  |\n2 | 5\8217-CTTCAAGAGAGAGACCTGCGT-3\8217\r\n  | ^\nunexpected '5'\nexpecting '>', end of input, end of line, or sequence\n"
+
+badFasta8 :: Either String (Fasta Char)
+badFasta8 = Left "input.fasta:21:5:\n   |\n21 | CMV + enhMCK + prcTnT-2\r\n   |     ^^\nunexpected \"+ \"\nexpecting end of input, end of line, or letter\n"
 
 fastaSpec :: Spec
-fastaSpec = describe "Fasta file parser." $ do
-    parseFile "test/FASTA/correct.fasta"
-    writeFile "test/FASTA/test.fasta"
+fastaSpec = describe "Fasta files parser." $ do
+    parseFile "test/FASTA/order1.fasta" correctFasta1
+    writeFile "test/FASTA/input.fasta" correctFasta1
+    parseBadFile "test/FASTA/order2.fasta" badFasta2
+    parseFile "test/FASTA/order3.fasta" correctFasta3
+    writeFile "test/FASTA/input.fasta" correctFasta3
+    parseBadFile "test/FASTA/order4.fasta" badFasta4
+    parseFile  "test/FASTA/order5.fasta" correctFasta5
+    writeFile "test/FASTA/input.fasta" correctFasta5
+    parseBadFile "test/FASTA/order6.fasta" badFasta6
+    parseBadFile "test/FASTA/order7.fasta" badFasta7
+    parseBadFile "test/FASTA/order8.fasta" badFasta8
 
-parseFile :: FilePath -> Spec
-parseFile path = do
+parseFile :: FilePath -> Fasta Char -> Spec
+parseFile path cf = do
     describe "fromFile" $ do
         it "correctly parses fasta from file" $ do
             fasta <- fromFile path
-            fasta `shouldBe` correctFasta
+            fasta `shouldBe` cf
 
-writeFile :: FilePath -> Spec
-writeFile path = describe "writeFile" $ do
+parseBadFile :: FilePath -> Either String (Fasta Char) -> Spec
+parseBadFile path cf = do
+    describe "fromFile" $ do
+         it "correctly parses fasta from file" $ do
+            res <- liftIO (readFile path)
+            let badRes = parseOnly fastaP res
+            badRes `shouldBe` cf
+
+writeFile :: FilePath -> Fasta Char -> Spec
+writeFile path cf = describe "writeFile" $ do
     it "correctly write fasta into file" $ do
-        toFile correctFasta path
+        toFile cf path
         fasta <- fromFile path
         removeFile path
-        fasta `shouldBe` correctFasta
-
+        fasta `shouldBe` cf
diff --git a/test/FastaParserSpec.hs b/test/FastaParserSpec.hs
index 25cda17..5378595 100644
--- a/test/FastaParserSpec.hs
+++ b/test/FastaParserSpec.hs
@@ -1,18 +1,19 @@
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeApplications  #-}
 
 module FastaParserSpec where
 
-import           Bio.FASTA.Parser     (fastaP)
-import           Bio.FASTA.Type       (Fasta, FastaItem (..), ModItem (..), Modification (..))
-import           Bio.Sequence         (bareSequence)
-import           Data.Attoparsec.Text (endOfInput, parseOnly)
-import           Data.Text            (Text)
-import qualified Data.Text            as T
+import           Bio.FASTA.Parser (fastaP, parseOnly)
+import           Bio.FASTA.Type   (Fasta, FastaItem (..), ModItem (..), Modification (..))
+import           Bio.Sequence     (bareSequence)
+import           Data.Bifunctor
+import           Data.Text        (Text)
+import qualified Data.Text        as T
 import           Test.Hspec
+import           Text.Megaparsec  (eof, errorBundlePretty, parse)
 
 fastaParserSpec :: Spec
-fastaParserSpec = describe "Fasta format parser." $ do
+fastaParserSpec = describe "Fasta format parser" $ do
     emptyFasta
     onlyName
     oneSequence
@@ -25,6 +26,7 @@ fastaParserSpec = describe "Fasta format parser." $ do
     sequenceWithTabsInName
     sequenceWithTabsInSequence
     sequenceWithModifications
+    sequenceWithSpaces
     toughParserTests
 
 emptyFasta :: Spec
@@ -49,19 +51,22 @@ twoSequences :: Spec
 twoSequences = describe "twoSequences" $ do
     it "correctly parses two correct sequences" $ do
         let res = parseOnly fastaP ">3HMX:A|PDBID|CHAIN|SEQUENCE\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSE\nVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL\n>7HMX:A|PDBID|CHAIN|SEQUENCE\nEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE\nVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL"
-        res `shouldBe` Right [FastaItem @Char "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL"), FastaItem @Char "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")]
+        res `shouldBe` Right
+          [ FastaItem @Char "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
+          , FastaItem @Char "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
+          ]
 
 sequenceWithDigit :: Spec
 sequenceWithDigit = describe "sequenceWithDigit" $ do
     it "correctly parses incorrect sequence with digit" $ do
-        let res = parseOnly fastaP ">123\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEE4GITWTLDQSSE"
-        res `shouldBe` Right [FastaItem @Char "123" (bareSequence "")]
+        let res = parseOnly (fastaP @Char) ">123\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEE4GITWTLDQSSE"
+        res `shouldBe` Left "input.fasta:2:34:\n  |\n2 | IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEE4GITWTLDQSSE\n  |                                  ^^\nunexpected \"4G\"\nexpecting end of input, end of line, or letter\n"
 
 sequenceWithWrongName :: Spec
 sequenceWithWrongName = describe "sequenceWithWrongName" $ do
     it "correctly parses incorrect sequence with wrong name" $ do
-        let res = parseOnly fastaP "123\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE"
-        res `shouldBe` Right ([] :: Fasta Char)
+        let res = parseOnly (fastaP @Char) "123\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE"
+        res `shouldBe` Left "input.fasta:1:1:\n  |\n1 | 123\n  | ^\nunexpected '1'\nexpecting '>' or end of input\n"
 
 sequenceWithSpacesInName :: Spec
 sequenceWithSpacesInName = describe "sequenceWithSpacesInName" $ do
@@ -71,15 +76,15 @@ sequenceWithSpacesInName = describe "sequenceWithSpacesInName" $ do
 
 sequenceWithSeveralEndOfLine :: Spec
 sequenceWithSeveralEndOfLine = describe "sequenceWithSeveralEndOfLine" $ do
-    it "correctly parses sequence with several \n after name" $ do
-        let res = parseOnly fastaP ">this is my sequence\n\n\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE"
-        res `shouldBe` Right [FastaItem @Char "this is my sequence" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE")]
+    it "correctly parses sequence with several \\n after name" $ do
+        let res = parseOnly (fastaP @Char) ">this is my sequence\n\n\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE"
+        res `shouldBe` Right [FastaItem "this is my sequence" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE")]
 
 sequenceWithSeveralEndOfLineInSequence :: Spec
 sequenceWithSeveralEndOfLineInSequence = describe "sequenceWithSeveralEndOfLineInSequence" $ do
-    it "correctly parses sequence with several \n between sequence parts" $ do
-        let res = parseOnly fastaP ">this is my sequence\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE\n\n\nYYYYYYYYYYYYYYYYYYYYYYYY"
-        res `shouldBe` Right [FastaItem @Char "this is my sequence" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSEYYYYYYYYYYYYYYYYYYYYYYYY")]
+    it "correctly parses sequence with several \\n between sequence parts" $ do
+        let res = parseOnly (fastaP @Char) ">this is my sequence\nIWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSE\n\n\nYYYYYYYYYYYYYYYYYYYYYYYY"
+        res `shouldBe` Right [FastaItem "this is my sequence" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEGITWTLDQSSEYYYYYYYYYYYYYYYYYYYYYYYY")]
 
 sequenceWithTabsInName :: Spec
 sequenceWithTabsInName = describe "sequenceWithTabsInName" $ do
@@ -99,13 +104,22 @@ sequenceWithModifications = describe "sequenceWithModifications" $ do
         let res = parseOnly fastaP ">this is my sequence\nIWEL[mU*]KKDVYV\t\t\nYY[56FAM]YY[Trololo]YY\t\n"
         res `shouldBe` Right [FastaItem "this is my sequence" (bareSequence [Letter 'I', Letter 'W', Letter 'E', Letter 'L', Mod Mod_mU_Star, Letter 'K', Letter 'K', Letter 'D', Letter 'V', Letter 'Y', Letter 'V', Letter 'Y', Letter 'Y', Mod Mod_56FAM, Letter 'Y', Letter 'Y', Mod (Unknown "[Trololo]"), Letter 'Y', Letter 'Y'])]
 
+sequenceWithSpaces :: Spec
+sequenceWithSpaces = describe "sequenceWithSpaces" $ do
+  it "correctly parses sequence with spaces" $ do
+      let res = parseOnly fastaP ">test1\nAAAA TTTT GGGG ccA\n"
+      res `shouldBe` Right [FastaItem @Char "test1" (bareSequence "AAAATTTTGGGGccA")]
+
 toughParserTests :: Spec
 toughParserTests = describe "various parser tests" $ do
     it "correctly parses empty lines" $ checkParser correctTest1 (Right correctAnswer)
     it "correctly parses empty lines with spaces" $ checkParser correctTest2 (Right correctAnswer)
     it "correctly parses empty lines with tabs" $ checkParser correctTest3 (Right correctAnswer)
-    it "correctly fails to parse a name without >" $ checkParser incorrectTest1 (Left "endOfInput")
-    it "correctly fails to parse a new sequence at the same line" $ checkParser incorrectTest2 (Left "endOfInput")
+    it "correctly parses empty lines with trailing tabs" $ checkParser correctTest4 (Right correctAnswer4)
+    it "correctly fails to parse a name without >" $ checkParser incorrectTest1
+      (Left "input.fasta:1:1:\n  |\n1 | test1\n  | ^\nunexpected 't'\nexpecting '>' or end of input\n")
+    it "correctly fails to parse a new sequence at the same line" $ checkParser incorrectTest2
+      (Left "input.fasta:3:8:\n  |\n3 | GHIJKL >test2\n  |        ^^\nunexpected \">t\"\nexpecting end of input, end of line, or letter\n")
 
 correctTest1 :: Text
 correctTest1 = T.unlines
@@ -137,6 +151,14 @@ correctTest3 = T.unlines
   , "ABCDEF"
   ]
 
+correctTest4 :: Text
+correctTest4 = "> test4\nTTTAGGTactTGT\t\t                                                                             \t\n"
+
+correctAnswer4 :: [FastaItem Char]
+correctAnswer4 =
+  [ FastaItem "test4" (bareSequence "TTTAGGTactTGT")
+  ]
+
 incorrectTest1 :: Text
 incorrectTest1 = T.unlines
   [ "test1"
@@ -157,5 +179,7 @@ incorrectTest2 = T.unlines
 correctAnswer :: Fasta Char
 correctAnswer = [FastaItem "test1" (bareSequence "ABCDEFGHIJKL"), FastaItem "test2" (bareSequence "ABCDEF")]
 
-checkParser :: Text -> Either String (Fasta Char) -> Expectation
-checkParser source expectation = parseOnly (fastaP <* endOfInput) source `shouldBe` expectation
+checkParser :: HasCallStack => Text -> Either String (Fasta Char) -> Expectation
+checkParser source expectation =
+  first errorBundlePretty (parse (fastaP <* eof) "input.fasta" source)
+    `shouldBe` expectation