@@ -69,23 +69,23 @@ def getargs():
69
69
iterM .add_argument ('-f' , '--fastqDir' , help = 'Name of the folder containing sequencing reads.' )
70
70
iterM .add_argument ('-F' , '--Format' , default = 'SRA' , choices = ['SRA' , 'FASTQ' ],
71
71
help = 'Format of the sequencing reads.' )
72
- iterM .add_argument ('-A' , '--aligner' , default = 'chromap' , choices = ['bwa-mem' , 'chromap ' , 'minimap2 ' ],
72
+ iterM .add_argument ('-A' , '--aligner' , default = 'chromap' , choices = ['bwa-mem' , 'bwa-mem2 ' , 'chromap ' ],
73
73
help = '''Name of the sequence alignment software to invoke.''' )
74
74
iterM .add_argument ('-i' , '--Index' ,
75
- help = '''Path to the bwa/chromap/minimap2 genome index. For example, if your reference genome
76
- is hg38.fa, and it is located within ~/data/hg38, then you need to specify --Index as "~/data/hg38/hg38.fa",
77
- "~/data/hg38/hg38.chromap-runhic.mmi ", and "~/data/hg38/hg38.minimap2 .mmi" for bwa-mem, chromap, and
78
- minimap2, respectively. When not specified, the index will be built automatically according to
79
- your aligner choice.''' )
75
+ help = '''Path to the bwa-mem/bwa-mem2/chromap index. For example, if the reference genome
76
+ is hg38.fa, and it is located within the folder ~/data/hg38, then you need to specify
77
+ this parameter as "~/data/hg38/hg38.fa", "~/data/hg38/hg38.fa ", and "~/data/hg38/hg38.chromap-runhic .mmi"
78
+ for bwa-mem, bwa-mem2, and chromap, respectively. If not specified, the index will be built
79
+ automatically according to the aligner choice.''' )
80
80
iterM .add_argument ('-t' , '--threads' , type = int , default = 8 , help = 'Number of threads.' )
81
81
iterM .add_argument ('--min-mapq' , type = int , default = 1 ,
82
82
help = '''The minimal MAPQ score to consider a read as uniquely mapped.''' )
83
- iterM .add_argument ('--max-molecule-size' , type = int , default = 2000 ,
83
+ iterM .add_argument ('--max-molecule-size' , type = int , default = 750 ,
84
84
help = '''The maximal size of a Hi-C molecule, used to rescue single ligations from
85
85
molecules with three alignments.''' )
86
86
iterM .add_argument ('--max-inter-align-gap' , type = int , default = 20 ,
87
87
help = '''A key parameter used by pairtools to rescue single ligations from walks.''' )
88
- iterM .add_argument ('--walks-policy' , default = 'all ' , choices = ['mask' , '5any' , '5unique' , '3any' , '3unique' , 'all' ],
88
+ iterM .add_argument ('--walks-policy' , default = '5unique ' , choices = ['mask' , '5any' , '5unique' , '3any' , '3unique' , 'all' ],
89
89
help = '''The policy used by pairtools to report unrescuable walks.''' )
90
90
iterM .add_argument ('--include-readid' , action = 'store_true' ,
91
91
help = '''If specified, add read IDs to the outputed .pairsam files.''' )
@@ -151,9 +151,8 @@ def getargs():
151
151
binReads .add_argument ('--mad-max' , type = int , default = 5 ,
152
152
help = '''Before ICE, drop bins whose log marginal sum is less than ``mad_max``
153
153
median absolute deviations below the median log marginal sum.''' )
154
- binReads .add_argument ('--high-res' , action = 'store_true' , help = '''If specified, bin pairs at 11 base-pair-delimited resolutions:
155
- 2500000,1000000,500000,250000,100000,50000,25000,10000,5000,2000,1000. The default setting is binning pairs at
156
- 9 resolutions: 2500000,1000000,500000,250000,100000,50000,25000,10000,5000.''' )
154
+ binReads .add_argument ('--resolutions' , default = '5000,10000,25000,50000,100000,250000,500000,1000000,2500000' ,
155
+ help = '''List of resolutions at which the contact matrices will be generated.''' )
157
156
binReads .add_argument ('--nproc' , type = int , default = 8 , help = '''Number of allocated proccesses.''' )
158
157
binReads .add_argument ('--max-split' , type = int , default = 2 , help = '''Divide the pairs from each chromosome
159
158
into at most this many chunks.''' )
@@ -186,9 +185,8 @@ def getargs():
186
185
add_help = False )
187
186
streamline .add_argument ('--max-split' , type = int , default = 2 , help = '''Divide the pairs from each chromosome
188
187
into at most this many chunks.''' )
189
- streamline .add_argument ('--high-res' , action = 'store_true' , help = '''If specified, bin pairs at 11 base-pair-delimited resolutions:
190
- 2500000,1000000,500000,250000,100000,50000,25000,10000,5000,2000,1000. The default setting is binning pairs at
191
- 9 resolutions: 2500000,1000000,500000,250000,100000,50000,25000,10000,5000.''' )
188
+ streamline .add_argument ('--resolutions' , default = '5000,10000,25000,50000,100000,250000,500000,1000000,2500000' ,
189
+ help = '''List of resolutions at which the contact matrices will be generated.''' )
192
190
streamline .set_defaults (func = pileup )
193
191
194
192
## Parse the command-line arguments
@@ -255,7 +253,7 @@ def run(args, commands):
255
253
'# Temporary Dir = {0}' .format (args .tmpdir )
256
254
])
257
255
if (commands [0 ] == 'pileup' ):
258
- arglist .extend (['# Generate contact maps at 11 resolutions = {0}' .format (args .high_res )])
256
+ arglist .extend (['# Resolutions = {0}' .format (args .resolutions )])
259
257
260
258
if commands [0 ] == 'filtering' :
261
259
arglist .extend (['# Original Pairs = {0}' .format (args .pairFolder ),
@@ -270,7 +268,7 @@ def run(args, commands):
270
268
'# Minimum Marginal Nonzeros = {0}' .format (args .min_nnz ),
271
269
'# Minimum Marginal Counts = {0}' .format (args .min_count ),
272
270
'# MAD Max = {0}' .format (args .mad_max ),
273
- '# Generate contact maps at 11 resolutions = {0}' .format (args .high_res ),
271
+ '# Resolutions = {0}' .format (args .resolutions ),
274
272
'# Number of processes = {0}' .format (args .nproc )])
275
273
276
274
if commands [0 ] == 'quality' :
@@ -303,9 +301,7 @@ def mapping(args, commands):
303
301
if not args .Index is None :
304
302
indexpath = os .path .abspath (os .path .expanduser (args .Index ))
305
303
else :
306
- if aligner == 'minimap2' :
307
- indexpath = os .path .join (genomeFolder , '.' .join ([genomeName , 'minimap2' , 'mmi' ]))
308
- elif aligner == 'chromap' :
304
+ if aligner == 'chromap' :
309
305
indexpath = os .path .join (genomeFolder , '.' .join ([genomeName , 'chromap-runhic' , 'mmi' ]))
310
306
else :
311
307
indexpath = os .path .join (genomeFolder , '.' .join ([genomeName , 'fa' ]))
@@ -325,10 +321,14 @@ def mapping(args, commands):
325
321
indexlock = os .path .join (genomeFolder , '.' .join ([genomeName , aligner , 'lock' ]))
326
322
if os .path .exists (indexlock ):
327
323
raise Exception ('''Another index building process is on. Leaving''' )
328
- if aligner in ['minimap2' , 'chromap' ]:
324
+
325
+ if aligner in ['chromap' ]:
329
326
icheck = glob .glob (indexpath )
327
+ elif aligner in ['bwa-mem2' ]:
328
+ icheck = glob .glob (indexpath + '.bwt.2bit.64' )
330
329
else :
331
- icheck = glob .glob (indexpath + '.sa' )
330
+ icheck = glob .glob (indexpath + '.bwt' )
331
+
332
332
if len (icheck ):
333
333
logging .log (21 , 'Set --Index to {0}' .format (indexpath ))
334
334
else :
@@ -737,14 +737,12 @@ def binning(args, commands):
737
737
738
738
logging .log (21 , 'Contact Matrices will be saved in .mcool format under {0}' .format (hFolder ))
739
739
740
- if args .high_res :
741
- intermediate = os .path .join (hFolder , os .path .basename (f ).replace ('.pairs.gz' , '.1kb.cool' ))
742
- else :
743
- intermediate = os .path .join (hFolder , os .path .basename (f ).replace ('.pairs.gz' , '.5kb.cool' ))
740
+ resolutions = sorted ([int (r ) for r in args .resolutions .split (',' )])
741
+ intermediate = os .path .join (hFolder , os .path .basename (f ).replace ('.pairs.gz' , '.{0}.cool' .format (resolutions [0 ])))
744
742
hFile = os .path .join (hFolder , os .path .basename (f ).replace ('.pairs.gz' , '.mcool' ))
745
- mcool_from_pairs (f , intermediate , hFile , ignore_diags = args . ignore_diags , nproc = args .nproc ,
746
- mad_max = args .mad_max , min_count = args .min_count , min_nnz = args .min_nnz ,
747
- max_split = args .max_split , high_res = args .high_res )
743
+ mcool_from_pairs (f , intermediate , hFile , resolutions , ignore_diags = args .ignore_diags ,
744
+ nproc = args .nproc , mad_max = args .mad_max , min_count = args .min_count ,
745
+ min_nnz = args .min_nnz , max_split = args .max_split )
748
746
749
747
completed = open (Indicator , 'wb' )
750
748
completed .close ()
@@ -818,7 +816,7 @@ def pileup(args, commands):
818
816
"""
819
817
mapping (args , commands )
820
818
args .stats_cache = 'allinone.cache'
821
- args .nproc = args . threads
819
+ args .nproc = 8
822
820
filtering (args , commands )
823
821
args .ignore_diags = 2
824
822
args .mad_max = 5
0 commit comments