Skip to content

Commit ac42e67

Browse files
committed
Pass a monotonic score into find_overlap_cutoffs
1 parent 3f07f7f commit ac42e67

File tree

2 files changed

+30
-29
lines changed

2 files changed

+30
-29
lines changed

micall/tests/data/referenceless_stitcher_logs/test_full_pipeline_tiny_values[876].txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ BEGIN 13
184184
to
185185
be
186186
(501,
187-
26).
187+
13).
188188
END 13
189189
BEGIN 14
190190
Overlap
@@ -196,14 +196,14 @@ BEGIN 14
196196
has
197197
aligned
198198
size
199-
29,
200-
19
199+
16,
200+
10
201201
matches,
202202
and
203203
the
204204
score
205205
of
206-
17440.79013769597.
206+
13031.98390114446.
207207
END 14
208208
BEGIN 15
209209
Found
@@ -212,7 +212,7 @@ BEGIN 15
212212
overlap
213213
of
214214
size
215-
24
215+
11
216216
between
217217
contigs
218218
(2)

micall/utils/referenceless_contig_stitcher.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -337,14 +337,14 @@ def compute_alignment_and_score(
337337

338338
# Cutoff computation helpers
339339
def cutoffs_left_covered(
340-
minimum_score: Score,
340+
minimum_acceptable: Score,
341341
left: ContigWithAligner,
342342
right: ContigWithAligner,
343343
shift: int,
344344
left_initial_overlap: str,
345345
) -> Tuple[int, int]:
346346
overlap_alignments = tuple(
347-
map_overlap(right, minimum_score, "cover", left_initial_overlap)
347+
map_overlap(right, minimum_acceptable, "cover", left_initial_overlap)
348348
)
349349
right_cutoff = max((end for start, end in overlap_alignments), default=-1)
350350
if right_cutoff < 0:
@@ -357,14 +357,14 @@ def cutoffs_left_covered(
357357

358358

359359
def cutoffs_right_covered(
360-
minimum_score: Score,
360+
minimum_acceptable: Score,
361361
left: ContigWithAligner,
362362
right: ContigWithAligner,
363363
shift: int,
364364
right_initial_overlap: str,
365365
) -> Tuple[int, int]:
366366
overlap_alignments = tuple(
367-
map_overlap(left, minimum_score, "cover", right_initial_overlap)
367+
map_overlap(left, minimum_acceptable, "cover", right_initial_overlap)
368368
)
369369
left_cutoff = min((start for start, end in overlap_alignments), default=-1)
370370
if left_cutoff < 0:
@@ -377,20 +377,20 @@ def cutoffs_right_covered(
377377

378378

379379
def cutoffs_left_shorter(
380-
minimum_score: Score,
380+
minimum_acceptable: Score,
381381
left: ContigWithAligner,
382382
right: ContigWithAligner,
383383
left_initial_overlap: str,
384384
right_initial_overlap: str,
385385
) -> Optional[Tuple[int, int]]:
386386
left_overlap_alignments = map_overlap(
387-
left, minimum_score, "left", right_initial_overlap
387+
left, minimum_acceptable, "left", right_initial_overlap
388388
)
389389
left_cutoff = min((start for start, end in left_overlap_alignments), default=-1)
390390
if left_cutoff < 0:
391391
return None
392392
right_overlap_alignments = map_overlap(
393-
right, minimum_score, "right", left_initial_overlap
393+
right, minimum_acceptable, "right", left_initial_overlap
394394
)
395395
right_cutoff = max((end for start, end in right_overlap_alignments), default=-1)
396396
if right_cutoff < 0:
@@ -399,20 +399,20 @@ def cutoffs_left_shorter(
399399

400400

401401
def cutoffs_right_shorter_or_equal(
402-
minimum_score: Score,
402+
minimum_acceptable: Score,
403403
left: ContigWithAligner,
404404
right: ContigWithAligner,
405405
left_initial_overlap: str,
406406
right_initial_overlap: str,
407407
) -> Optional[Tuple[int, int]]:
408408
right_overlap_alignments = map_overlap(
409-
right, minimum_score, "right", left_initial_overlap
409+
right, minimum_acceptable, "right", left_initial_overlap
410410
)
411411
right_cutoff = max((end for start, end in right_overlap_alignments), default=-1)
412412
if right_cutoff < 0:
413413
return None
414414
left_overlap_alignments = map_overlap(
415-
left, minimum_score, "left", right_initial_overlap
415+
left, minimum_acceptable, "left", right_initial_overlap
416416
)
417417
left_cutoff = min((start for start, end in left_overlap_alignments), default=-1)
418418
if left_cutoff < 0:
@@ -421,7 +421,7 @@ def cutoffs_right_shorter_or_equal(
421421

422422

423423
def compute_overlap_cutoffs(
424-
minimum_score: Score,
424+
minimum_acceptable: Score,
425425
left: ContigWithAligner,
426426
right: ContigWithAligner,
427427
shift: int,
@@ -430,23 +430,23 @@ def compute_overlap_cutoffs(
430430
) -> Optional[Tuple[int, int]]:
431431
if len(left.seq) == len(left_initial_overlap):
432432
return cutoffs_left_covered(
433-
minimum_score, left, right, shift, left_initial_overlap
433+
minimum_acceptable, left, right, shift, left_initial_overlap
434434
)
435435
if len(right.seq) == len(right_initial_overlap):
436436
return cutoffs_right_covered(
437-
minimum_score, left, right, shift, right_initial_overlap
437+
minimum_acceptable, left, right, shift, right_initial_overlap
438438
)
439439
if len(left.seq) < len(right.seq):
440440
return cutoffs_left_shorter(
441-
minimum_score, left, right, left_initial_overlap, right_initial_overlap
441+
minimum_acceptable, left, right, left_initial_overlap, right_initial_overlap
442442
)
443443
return cutoffs_right_shorter_or_equal(
444-
minimum_score, left, right, left_initial_overlap, right_initial_overlap
444+
minimum_acceptable, left, right, left_initial_overlap, right_initial_overlap
445445
)
446446

447447

448448
def find_overlap_cutoffs(
449-
minimum_score: Score,
449+
minimum_acceptable: Score,
450450
left: ContigWithAligner,
451451
right: ContigWithAligner,
452452
shift: int,
@@ -462,28 +462,29 @@ def find_overlap_cutoffs(
462462
463463
Caching:
464464
Results are cached per (left.id, right.id). When no valid overlap region
465-
is possible for the given pair (under `minimum_score`), a None entry is
465+
is possible for the given pair, a None entry is
466466
recorded and None is returned on subsequent calls.
467467
468468
Returns:
469469
Tuple (left_cutoff, right_cutoff) on success, or None if no valid
470-
overlap region satisfies the minimum score.
470+
overlap region satisfies the minimum acceptable score or something lower than it.
471471
"""
472472

473473
# Note:
474-
# It is fine to omit `minimum_score` from the cache key because
475-
# the cutoffs are monotonic with respect to `minimum_score`.
476-
# Increasing `minimum_score` can only reduce the valid overlap region,
474+
# It is fine to omit `minimum_acceptable` from the cache key because
475+
# the cutoffs are monotonic with respect to `minimum_acceptable`.
476+
# Increasing `minimum_acceptable` can only reduce the valid overlap region,
477477
# never expand it. Therefore, the cutoffs computed for a lower
478-
# `minimum_score` are always valid for a higher one.
478+
# `minimum_acceptable` are always valid for a higher one.
479+
# The `minimum_acceptable` is monotonic.
479480
key = (left.id, right.id)
480481
existing = cutoffs_cache.get(key, -1)
481482
if existing != -1:
482483
ret: CutoffsCacheResult = existing # type: ignore[assignment]
483484
return ret
484485

485486
value = compute_overlap_cutoffs(
486-
minimum_score, left, right, shift, left_initial_overlap, right_initial_overlap
487+
minimum_acceptable, left, right, shift, left_initial_overlap, right_initial_overlap
487488
)
488489
cutoffs_cache[key] = value
489490
return value
@@ -622,7 +623,7 @@ def try_combine_contigs(
622623
left, right, shift, left_initial_overlap, right_initial_overlap, overlap = prepared
623624

624625
cutoffs = find_overlap_cutoffs(
625-
minimum_base_score,
626+
pool.min_acceptable_score,
626627
left,
627628
right,
628629
shift,

0 commit comments

Comments
 (0)