@@ -45,7 +45,6 @@ class ReadType(enum.Enum):
45
45
46
46
CODING_REGION = "cDNA" # TODO: Make this take in / read in a model so that we don't have to specify this.
47
47
READ_QUALITY_TAG = "rq"
48
- BACK_ALIGNMENT_SCORE_TAG = "JB"
49
48
50
49
51
50
class ReadSnapshot :
@@ -197,6 +196,13 @@ def __hash__(self) -> int:
197
196
show_default = True ,
198
197
help = "Tag into which to put whether a given UMI was actually corrected."
199
198
)
199
+ @click .option (
200
+ "--back-alignment-score-tag" ,
201
+ type = str ,
202
+ default = "JB" ,
203
+ show_default = True ,
204
+ help = "Tag containing the back (trailing adapter) alignment score." ,
205
+ )
200
206
@click .option (
201
207
"-l" ,
202
208
"--umi-length" ,
@@ -235,7 +241,8 @@ def __hash__(self) -> int:
235
241
def main (umi_length , max_ccs_edit_dist , max_clr_edit_dist , max_ccs_length_diff , max_clr_length_diff , max_ccs_gc_diff ,
236
242
max_clr_gc_diff , max_ccs_umi_length_delta , max_clr_umi_length_delta , max_final_ccs_umi_length_delta ,
237
243
max_final_clr_umi_length_delta , min_back_seg_score , umi_tag , gene_tag , eq_class_tag , final_umi_tag ,
238
- umi_corrected_tag , output_bam , reject_bam , force , pre_extracted , cache_read_loci , input_bam ):
244
+ umi_corrected_tag , back_alignment_score_tag , output_bam , reject_bam , force , pre_extracted , cache_read_loci ,
245
+ input_bam ):
239
246
"""Correct UMIs with Set Cover algorithm."""
240
247
# This algorithm was originally developed by Victoria Popic and imported into Longbow by Jonn Smith.
241
248
@@ -320,7 +327,7 @@ def main(umi_length, max_ccs_edit_dist, max_clr_edit_dist, max_ccs_length_diff,
320
327
read .set_tag (umi_corrected_tag , 0 )
321
328
322
329
if read_passes_filters (read , umi_length , min_back_seg_score , max_final_ccs_umi_length_delta ,
323
- max_final_clr_umi_length_delta , final_umi_tag ):
330
+ max_final_clr_umi_length_delta , final_umi_tag , back_alignment_score_tag ):
324
331
correct_umi_bam .write (read )
325
332
num_corrected += 1
326
333
else :
@@ -360,8 +367,8 @@ def get_read_seq(read, pre_extracted):
360
367
return read .query_sequence .upper ()[seg .start :seg .end + 1 ]
361
368
362
369
363
- def get_back_aln_score (read ):
364
- return int (read .get_tag (BACK_ALIGNMENT_SCORE_TAG ).split ("/" )[0 ])
370
+ def get_back_aln_score (read , back_alignment_score_tag ):
371
+ return int (read .get_tag (back_alignment_score_tag ).split ("/" )[0 ])
365
372
366
373
367
374
def valid_umi (read , umi_length , ccs_max_umi_len_delta , clr_max_umi_len_delta , umi_tag ):
@@ -384,12 +391,12 @@ def valid_tags(read, umi_tag, eq_class_tag):
384
391
385
392
386
393
def read_passes_filters (read , umi_length , min_back_seg_score , max_final_ccs_umi_length_delta ,
387
- max_final_clr_umi_length_delta , final_umi_tag ):
394
+ max_final_clr_umi_length_delta , final_umi_tag , back_alignment_score_tag ):
388
395
# filters the read based on the final UMI length and back alignment score
389
396
390
397
max_umi_delta_filter = max_final_ccs_umi_length_delta \
391
398
if ReadType (get_read_type (read )) == ReadType .CCS else max_final_clr_umi_length_delta
392
- return get_back_aln_score (read ) >= min_back_seg_score and abs (len (read .get_tag (final_umi_tag )) - umi_length ) <= max_umi_delta_filter
399
+ return get_back_aln_score (read , back_alignment_score_tag ) >= min_back_seg_score and abs (len (read .get_tag (final_umi_tag )) - umi_length ) <= max_umi_delta_filter
393
400
394
401
395
402
def extract_read_groups (input_bam_fname , umi_length , pre_extracted , ccs_max_umi_len_delta , clr_max_umi_len_delta , umi_tag ,
0 commit comments