Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Devel #57

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# test
*/test_*
data/LorenaP/*
data/test_*/report/*
data/test_*/res/*
data/res/*
Expand Down
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
- 1.2.10

* Fix bug counting sequences expression in stats file.

- 1.2.9

* Fix bug where UMI is mistakenly detected in read names containing "ILLUMINA"
Expand Down
4 changes: 2 additions & 2 deletions seqcluster/detect/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def detect_clusters(c, current_seq, MIN_SEQ, non_un_gl=False):
logger.info("%s Clusters read" % eindex)
# merge cluster with shared sequences
metacluster_obj, cluster_id = _find_metaclusters(current_clus, sequence2clusters, current_seq, MIN_SEQ)

# import pdb; pdb.set_trace()
return cluster_info_obj(current_clus, metacluster_obj, current_loci, current_seq)

def _common(items, seen):
Expand Down Expand Up @@ -158,7 +158,7 @@ def _find_metaclusters(clus_obj, sequence2clusters, current_seq, min_seqs):

for itern, name in enumerate(sequence2clusters):
clusters = sequence2clusters[name]
if len(clusters) == 0:
if len(clusters) == 0: # when sequence doesn't belong to a cluster
c_index -= 1
continue
current_seq[name].align = 1
Expand Down
1 change: 1 addition & 0 deletions seqcluster/detect/metacluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def reduceloci(clus_obj, path):
logger.info("Clusters too long to be analyzed: %s" % large)
logger.info("Number of clusters removed because low number of reads: %s" % REMOVED)
logger.info("Number of clusters with conflicts: %s" % CONFLICT)
#import pdb;pdb.set_trace()
return clus_obj


Expand Down
15 changes: 11 additions & 4 deletions seqcluster/make_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def cluster(args):
dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
dt['step'] = 'cleaned'
dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

clusL = _create_clusters(seqL, bam_file, args)

y, l = _total_counts(list(clusL.seq.keys()), clusL.seq, aligned=True)
# y, l = _total_counts(list(clusL.seq.keys()), clusL.seq, aligned=True)
y, l = _total_counts(clusL.clus, seqL)
# import pdb;pdb.set_trace()
logger.info("counts after: %s" % sum(y.values()))
logger.info("# sequences after: %s" % l)
dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
Expand All @@ -75,6 +75,8 @@ def cluster(args):
logger.info("Solving multi-mapping events in the network of clusters")
clusLred = _cleaning(clusL, args.dir_out)
y, l = _total_counts(clusLred.clus, seqL)
# import pdb;pdb.set_trace()
# y, l = _total_counts(list(clusLred.seq.keys()), clusLred.seq, aligned=True)
logger.info("counts after: %s" % sum(y.values()))
logger.info("# sequences after: %s" % l)
dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
Expand Down Expand Up @@ -157,7 +159,12 @@ def _total_counts(seqs, seqL, aligned=False):
else:
nseqs = len([total.update(seqL[s].freq) for s in seqs if seqL[s].align > 0])
elif isinstance(seqs, dict):
[total.update(seqs[s].get_freq(seqL)) for s in seqs]
#[total.update(seqs[s].get_freq(seqL)) for s in seqs]
# import pdb;pdb.set_trace()
# !import code; code.interact(local=vars())
seqs_in=[]
void=[seqs_in.extend(list(seqs[s].idmembers.keys())) for s in seqs]
len([total.update(seqL[s].freq) for s in set(seqs_in)])
nseqs = sum(len(seqs[s].idmembers) for s in seqs)
return total, nseqs

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def readme():


setup(name='seqcluster',
version='1.2.9',
version='1.2.10',
description='Small RNA-seq pipeline',
long_description=readme(),
long_description_content_type="text/markdown",
Expand Down