-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch-cutadapt.py
32 lines (26 loc) · 1.02 KB
/
batch-cutadapt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from glob import glob
import re
import subprocess
import sys
files = glob('/data/covid/SRR*.fastq')
# ignore FASTQ output from previous cutadapt runs
files = [f for f in files if '.trim.' not in f]
# determine sample names
pat = re.compile("SRR[0-9]+")
samples = [pat.findall(f)[0] for f in files]
samples = set(samples)
for sample in samples:
if '/data/covid/{}_1.fastq'.format(sample) in files:
subprocess.check_call([
'cutadapt', '--cores', '6', '-q', '20,20', '-a', 'CTGTCTCTTATACACATCT',
'-A', 'CTGTCTCTTATACACATCT',
'-o', '/data/covid/{}_1.trim.fastq'.format(sample),
'-p', '/data/covid/{}_2.trim.fastq'.format(sample),
'/data/covid/{}_1.fastq'.format(sample), '/data/covid/{}_2.fastq'.format(sample)
])
else:
subprocess.check_call([
'cutadapt', '--cores', '6', '-q', '20,20', '-a', 'CTGTCTCTTATACACATCT',
'-o', '/data/covid/{}.trim.fastq'.format(sample),
'/data/covid/{}.fastq'.format(sample)
])