Skip to content

Commit afe6f81

Browse files
authored
Exposed the back alignment score parameter. (#188)
1 parent 0f50694 commit afe6f81

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

docs/commands/correct_umi.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ Options:
9797
values. [default: BX]
9898
--umi-corrected-tag TEXT Tag into which to put whether a given UMI
9999
was actually corrected. [default: UX]
100+
--back-alignment-score-tag TEXT
101+
Tag containing the back (trailing adapter)
102+
alignment score. [default: JB]
100103
-l, --umi-length INTEGER Length of the UMI for this sample.
101104
[default: 10]
102105
-o, --output-bam PATH Corrected UMI bam output [default: stdout].

src/longbow/commands/correct_umi.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ class ReadType(enum.Enum):
4545

4646
CODING_REGION = "cDNA" # TODO: Make this take in / read in a model so that we don't have to specify this.
4747
READ_QUALITY_TAG = "rq"
48-
BACK_ALIGNMENT_SCORE_TAG = "JB"
4948

5049

5150
class ReadSnapshot:
@@ -197,6 +196,13 @@ def __hash__(self) -> int:
197196
show_default=True,
198197
help="Tag into which to put whether a given UMI was actually corrected."
199198
)
199+
@click.option(
200+
"--back-alignment-score-tag",
201+
type=str,
202+
default="JB",
203+
show_default=True,
204+
help="Tag containing the back (trailing adapter) alignment score.",
205+
)
200206
@click.option(
201207
"-l",
202208
"--umi-length",
@@ -235,7 +241,8 @@ def __hash__(self) -> int:
235241
def main(umi_length, max_ccs_edit_dist, max_clr_edit_dist, max_ccs_length_diff, max_clr_length_diff, max_ccs_gc_diff,
236242
max_clr_gc_diff, max_ccs_umi_length_delta, max_clr_umi_length_delta, max_final_ccs_umi_length_delta,
237243
max_final_clr_umi_length_delta, min_back_seg_score, umi_tag, gene_tag, eq_class_tag, final_umi_tag,
238-
umi_corrected_tag, output_bam, reject_bam, force, pre_extracted, cache_read_loci, input_bam):
244+
umi_corrected_tag, back_alignment_score_tag, output_bam, reject_bam, force, pre_extracted, cache_read_loci,
245+
input_bam):
239246
"""Correct UMIs with Set Cover algorithm."""
240247
# This algorithm was originally developed by Victoria Popic and imported into Longbow by Jonn Smith.
241248

@@ -320,7 +327,7 @@ def main(umi_length, max_ccs_edit_dist, max_clr_edit_dist, max_ccs_length_diff,
320327
read.set_tag(umi_corrected_tag, 0)
321328

322329
if read_passes_filters(read, umi_length, min_back_seg_score, max_final_ccs_umi_length_delta,
323-
max_final_clr_umi_length_delta, final_umi_tag):
330+
max_final_clr_umi_length_delta, final_umi_tag, back_alignment_score_tag):
324331
correct_umi_bam.write(read)
325332
num_corrected += 1
326333
else:
@@ -360,8 +367,8 @@ def get_read_seq(read, pre_extracted):
360367
return read.query_sequence.upper()[seg.start:seg.end + 1]
361368

362369

363-
def get_back_aln_score(read):
364-
return int(read.get_tag(BACK_ALIGNMENT_SCORE_TAG).split("/")[0])
370+
def get_back_aln_score(read, back_alignment_score_tag):
371+
return int(read.get_tag(back_alignment_score_tag).split("/")[0])
365372

366373

367374
def valid_umi(read, umi_length, ccs_max_umi_len_delta, clr_max_umi_len_delta, umi_tag):
@@ -384,12 +391,12 @@ def valid_tags(read, umi_tag, eq_class_tag):
384391

385392

386393
def read_passes_filters(read, umi_length, min_back_seg_score, max_final_ccs_umi_length_delta,
387-
max_final_clr_umi_length_delta, final_umi_tag):
394+
max_final_clr_umi_length_delta, final_umi_tag, back_alignment_score_tag):
388395
# filters the read based on the final UMI length and back alignment score
389396

390397
max_umi_delta_filter = max_final_ccs_umi_length_delta \
391398
if ReadType(get_read_type(read)) == ReadType.CCS else max_final_clr_umi_length_delta
392-
return get_back_aln_score(read) >= min_back_seg_score and abs(len(read.get_tag(final_umi_tag)) - umi_length) <= max_umi_delta_filter
399+
return get_back_aln_score(read, back_alignment_score_tag) >= min_back_seg_score and abs(len(read.get_tag(final_umi_tag)) - umi_length) <= max_umi_delta_filter
393400

394401

395402
def extract_read_groups(input_bam_fname, umi_length, pre_extracted, ccs_max_umi_len_delta, clr_max_umi_len_delta, umi_tag,

0 commit comments

Comments
 (0)