Skip to content

Commit

Permalink
Add support for backup cluster file
Browse files Browse the repository at this point in the history
  • Loading branch information
yuanx749 committed Jun 26, 2024
1 parent 0f939af commit b5fa943
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pycdhit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ._commands import * # noqa: F403
from ._io import * # noqa: F403

VERSION = "0.13.0"
VERSION = "1.0.0"

__all__ = [ # noqa: F405
"CommandBase",
Expand Down
6 changes: 5 additions & 1 deletion pycdhit/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,17 @@ def read_clstr(file: FilePath) -> pd.DataFrame:
# refer to PrintInfo
identifier, cluster, size, is_representative, identity = [], [], [], [], []
coverage, strand = [], [] # distance is not used
with open(file) as f:
bak = f.read(1) != ">"
with open(file) as f:
for line in f:
if line[0] == ">":
idx = int(re.search(r">Cluster (\d+)", line).group(1))
continue
cluster.append(idx)
line = line.split()
if bak:
idx = int(line[0])
cluster.append(idx)
size.append(int(re.search(r"(\d+)(aa|nt),", line[1]).group(1)))
identifier.append(re.search(r">(.+)\.{3}", line[2]).group(1))
if line[3] == "*":
Expand Down

0 comments on commit b5fa943

Please sign in to comment.