Skip to content

Commit

Permalink
version 0.1.2.5: fix FASTA-parser for the spaces (#24)
Browse files Browse the repository at this point in the history
* version 0.1.2.5: fix FASTA-parser for the spaces

* added empty test
  • Loading branch information
ozzzzz authored and zmactep committed Dec 25, 2019
1 parent 3c5e1c4 commit 4af28a8
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 12 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.1.2.5] - 2019-12-24
### Fixed
- Possibility to have spaces in Fasta sequences.

## [0.1.2.4] - 2019-12-23
### Added
- Preprocessing for pdb-files.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.2.4
version: 0.1.2.5
github: "less-wrong/cobot-io"
license: BSD3
category: Bio
Expand Down
17 changes: 9 additions & 8 deletions src/Bio/FASTA/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ module Bio.FASTA.Parser
( fastaP
) where

import Bio.FASTA.Type (Fasta, FastaItem(..))
import Bio.Sequence (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, many', many1', char, endOfLine, letter,
takeWhile, choice, endOfInput)
import Data.Text (Text, strip)
import Prelude hiding (takeWhile)
import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.Sequence (BareSequence, bareSequence)
import Data.Attoparsec.Text (Parser, char, choice, endOfInput,
endOfLine, letter, many', many1',
takeWhile)
import Data.Text (Text, strip)
import Prelude hiding (takeWhile)

-- | Parser of .fasta file.
--
Expand All @@ -23,8 +24,8 @@ seqName = strip <$> (char '>' *> tabs *> takeWhile (`notElem` ['\n', '\r']) <* t
fastaSeq :: Parser (BareSequence Char)
fastaSeq = bareSequence . mconcat <$> many' line

line :: Parser (String)
line = many1' letter <* eol
line :: Parser String
line = concat <$> many1' (many1' letter <* many' (char ' ')) <* eol

eol :: Parser ()
eol = tabs *> choice [slashN, endOfInput]
Expand Down
3 changes: 2 additions & 1 deletion src/Bio/FASTA/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import Bio.Sequence (BareSequence)
import Data.Text (Text)

-- | Type alias for FASTA file.
-- satisfies the following format : >(\s|\t)*[^\n\r]+(\s|\t)*(\n|\r)*(\w(\n|\r)*)*
-- satisfies the following format : >(\s|\t)*[^\n\r]+(\s|\t)*(\n|\r)*((\w|\s)(\n|\r)*)*
--
type Fasta a = [FastaItem a]

-- | One record in FASTA file.
Expand Down
12 changes: 11 additions & 1 deletion test/FASTA/correct.fasta
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,14 @@ VLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL

>7HMX:A|PDBID|CHAIN|SEQUENCE
EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
VLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL
VLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL

> With_spaces
MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER
MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER

MDFFDLDIEI KQERLPAECS LNSPLNYSLS AQLTDRMTPR TENVRRQRER

> Empty_ha_ha_ha


6 changes: 5 additions & 1 deletion test/FASTASpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ import System.Directory (removeFile)
import Test.Hspec

correctFasta :: Fasta Char
correctFasta = [FastaItem "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL"), FastaItem "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")]
correctFasta = [ FastaItem "3HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "IWELKKDVYVVELDWYPDAPGEMVVLTCDTPEEDGITWTLDQSSEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
, FastaItem "7HMX:A|PDBID|CHAIN|SEQUENCE" (bareSequence "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEVLGSGKTLTIQVKEFGDAGQYTCHKGGEVLSHSLL")
, FastaItem "With_spaces" (bareSequence "MDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRERMDFFDLDIEIKQERLPAECSLNSPLNYSLSAQLTDRMTPRTENVRRQRER")
, FastaItem "Empty_ha_ha_ha" (bareSequence "")
]

fastaSpec :: Spec
fastaSpec = describe "Fasta file parser." $ do
Expand Down

0 comments on commit 4af28a8

Please sign in to comment.