Skip to content

Commit 6728d39

Browse files
authored
version 0.1.5.4 Fix Fasta parser with modification (#74)
1 parent 6717ad6 commit 6728d39

File tree

7 files changed

+38
-7
lines changed

7 files changed

+38
-7
lines changed

ChangeLog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## [Unreleased]
44

5+
## [0.1.5.4] - 2024-05-16
6+
- Fix Fasta parser for unknown modifications on the end of the line.
7+
58
## [0.1.5.3] - 2023-12-08
69
- Update tests and dependencies.
710

package.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: cobot-io
2-
version: 0.1.5.3
2+
version: 0.1.5.4
33
github: "biocad/cobot-io"
44
license: BSD3
55
category: Bio

src/Bio/FASTA.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import Bio.FASTA.Writer (WritableFastaToken (..), fastaToText)
2828

2929
-- | Reads 'FastaSequence' from given file.
3030
--
31-
fromFile :: (MonadFail m, MonadIO m) => FilePath -> m (Fasta Char)
31+
fromFile :: (MonadFail m, MonadIO m, ParsableFastaToken a) => FilePath -> m (Fasta a)
3232
fromFile f = liftIO (readFile f) >>= either (fail . errorBundlePretty) pure . parse fastaP (takeBaseName f)
3333

3434
-- | Writes 'FastaSequence' to file.

src/Bio/FASTA/Parser.hs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,14 @@ type Parser = Parsec Void Text
3636
parseOnly :: Parsec Void Text a -> Text -> Either String a
3737
parseOnly p s = first errorBundlePretty $ parse p "input.fasta" s
3838

39+
-- Using 'hspace1' instead of just 'space1' because our 'fastaLine' parser
40+
-- expects each line to end with line-ending or end of file. But if 'sc' consumes end-of-line,
41+
-- 'lexeme' in 'unknownP' also will and 'fastaLine' will not know that line has ended and will
42+
-- expect more symbols.
43+
--
44+
-- 'hspace1' consumes only "horizontal" space, leaving line-ending for 'fastaLine'.
3945
sc :: Parser ()
40-
sc = L.space space1 empty empty
46+
sc = L.space hspace1 empty empty
4147

4248
lexeme :: Parser a -> Parser a
4349
lexeme = L.lexeme sc

test/FASTA/order10.fasta

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
>mol1
2+
[FAM]ACGT[UNK][

test/FASTA/order9.fasta

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
>mol1
2+
[FAM]ACGT[UNK]
3+
4+
>mol2
5+
[HEX]ACCGT
6+
7+
>mol3
8+
[HEX]ACGTCA[UNK]

test/FASTASpec.hs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ import Prelude hiding (readFile, writeFile)
1010
import System.Directory (removeFile)
1111
import Test.Hspec
1212

13-
import Bio.FASTA (fastaP, fromFile, toFile)
13+
import Bio.FASTA (ParsableFastaToken, fastaP, fromFile, toFile)
1414
import Bio.FASTA.Parser (parseOnly)
15-
import Bio.FASTA.Type (Fasta, FastaItem (..))
15+
import Bio.FASTA.Type (Fasta, FastaItem (..), ModItem (..), Modification (..))
1616
import Bio.Sequence (bareSequence)
1717

1818
correctFasta1 :: Fasta Char
@@ -45,6 +45,16 @@ badFasta7 = Left "input.fasta:2:1:\n |\n2 | 5\8217-CTTCAAGAGAGAGACCTGCGT-3\8217
4545
badFasta8 :: Either String (Fasta Char)
4646
badFasta8 = Left "input.fasta:21:5:\n |\n21 | CMV + enhMCK + prcTnT-2\r\n | ^^\nunexpected \"+ \"\nexpecting end of input, end of line, or letter\n"
4747

48+
correctFasta9 :: Fasta ModItem
49+
correctFasta9 =
50+
[ FastaItem "mol1" $ bareSequence [Mod (Unknown "[FAM]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Mod (Unknown "[UNK]")]
51+
, FastaItem "mol2" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'C',Letter 'G',Letter 'T']
52+
, FastaItem "mol3" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Letter 'C',Letter 'A',Mod (Unknown "[UNK]")]
53+
]
54+
55+
badFasta10 :: Either String (Fasta ModItem)
56+
badFasta10 = Left "input.fasta:2:16:\n|\n2|[FAM]ACGT[UNK][\n|^\nunexpectednewline\nexpectingmodificationname\n"
57+
4858
fastaSpec :: Spec
4959
fastaSpec = describe "Fasta files parser" $ do
5060
describe "fromFile" $ do
@@ -56,19 +66,21 @@ fastaSpec = describe "Fasta files parser" $ do
5666
parseBadFile "test/FASTA/order6.fasta" badFasta6
5767
parseBadFile "test/FASTA/order7.fasta" badFasta7
5868
parseBadFile "test/FASTA/order8.fasta" badFasta8
69+
parseFile "test/FASTA/order9.fasta" correctFasta9
70+
parseBadFile "test/FASTA/order10.fasta" badFasta10
5971

6072
describe "toFile" $ do
6173
writeFile "test/FASTA/input.fasta" correctFasta5
6274
writeFile "test/FASTA/input.fasta" correctFasta1
6375
writeFile "test/FASTA/input.fasta" correctFasta3
6476

65-
parseFile :: FilePath -> Fasta Char -> Spec
77+
parseFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Fasta a -> Spec
6678
parseFile path cf =
6779
it ("correctly parses good fasta from file " <> path) $ do
6880
fasta <- fromFile path
6981
fasta `shouldBe` cf
7082

71-
parseBadFile :: FilePath -> Either String (Fasta Char) -> Spec
83+
parseBadFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Either String (Fasta a) -> Spec
7284
parseBadFile path cf =
7385
it ("correctly parses bad fasta from file " <> path) $ do
7486
res <- liftIO (readFile path)

0 commit comments

Comments
 (0)