-
Notifications
You must be signed in to change notification settings - Fork 1
/
simreads-para.py
123 lines (68 loc) · 1.9 KB
/
simreads-para.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
#written for 082
#use with simabund.py to make the abundance files
import sys
import os
import subprocess
import re
import glob
from random import randrange
import multiprocessing
import concurrent.futures
digits = re.compile(r'(\d+)')
def tokenize(filename):
return tuple(int(token) if match else token
for token, match in
((fragment, digits.search(fragment))
for fragment in digits.split(filename)))
def getreads(abund_file):
f=open(abund_file,'r')
name1=abund_file.split("/")[-1].split(".")[0]
g=open(outfolder+"/"+name1+".fasta",'w')
for x in f:
seqfile=x.split("\t")[0]
abund=float(x.split("\t")[1].rstrip("\n"))
numreads=abund*totalreads
if numreads<1:
print seqfile,0,"less than one read, none written"
break
#get genome(s)
print "getting reads from", seqfile
try:
h=open(seqfile,'r')
except:
print "cannot open", seqfile
break
seq=""
for i in h:
if i[0]<>">":
seq=seq+i.rstrip("\n")
#get reads
n=0
for j in range(1,int(numreads+1)):
rstart=randrange(0,len(seq)-readlen)
read1 = seq[rstart:rstart+readlen]
#mutate
g.write(">"+seqfile.split("/")[-1].split(".")[0]+"_"+str(j)+"\n")
c=-1
for x in read1:
c=c+1
r1=randrange(int(1/mutrate))
if r1==1:
g.write(bases[randrange(4)])
else:
g.write(x)
g.write("\n")
folder=sys.argv[1]
filelist=glob.glob(folder)
filelist.sort(key=tokenize)
outfolder=sys.argv[2]
readlen=int(sys.argv[3])
totalreads=float(sys.argv[4])
mutrate=float(sys.argv[5])
threads=int(sys.argv[6])
bases=["A","T","C","G","N"]
print filelist
executor = concurrent.futures.ProcessPoolExecutor(threads)
futures = [executor.submit(getreads, abund_file) for abund_file in filelist]
concurrent.futures.wait(futures)