@@ -464,170 +464,3 @@ void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int ma
464
464
}
465
465
mm_set_inv_mapq (km , n_regs , regs );
466
466
}
467
-
468
- /******************************
469
- * Instructed split alignment *
470
- ******************************/
471
-
472
- #define MM_MIN_EXON_LEN 20
473
-
474
- static int32_t mm_jump_check (void * km , const mm_idx_t * mi , int32_t qlen , const uint8_t * qseq0 , const mm_reg1_t * r , int32_t ext , int32_t is_left ) // TODO: check close N
475
- {
476
- int32_t clip , clen , e = !r -> rev ^ !is_left ; // 0 for left of the alignment; 1 for right
477
- uint32_t cigar ;
478
- if (!r -> p || r -> p -> n_cigar <= 0 ) return -1 ; // only working with CIGAR
479
- clip = e == 0 ? r -> qs : qlen - r -> qe ;
480
- cigar = r -> p -> cigar [e == 0 ? 0 : r -> p -> n_cigar - 1 ];
481
- clen = (cigar & 0xf ) == MM_CIGAR_MATCH ? cigar >>4 : 0 ;
482
- if (clen <= ext ) return -1 ;
483
- if (is_left ) {
484
- if (clip >= r -> rs ) return -1 ; // no space to jump
485
- } else {
486
- if (clip >= mi -> seq [r -> rid ].len - r -> re ) return -1 ; // no space to jump
487
- }
488
- return 0 ;
489
- }
490
-
491
- static uint8_t * mm_jump_get_qseq_seq (void * km , int32_t qlen , const uint8_t * qseq0 , const mm_reg1_t * r , int32_t is_left , int32_t ql0 , uint8_t * qseq )
492
- {
493
- int32_t i , k ;
494
- if (!r -> rev ) {
495
- if (is_left ) memcpy (qseq , qseq0 , ql0 );
496
- else memcpy (qseq , & qseq0 [qlen - ql0 ], ql0 );
497
- } else {
498
- if (is_left )
499
- for (i = qlen - 1 , k = 0 ; i >= qlen - ql0 ; -- i )
500
- qseq [k ++ ] = qseq0 [i ] >= 4 ? qseq0 [i ] : 3 - qseq0 [i ];
501
- else
502
- for (i = ql0 - 1 , k = 0 ; i >= 0 ; -- i )
503
- qseq [k ++ ] = qseq0 [i ] >= 4 ? qseq0 [i ] : 3 - qseq0 [i ];
504
- }
505
- return qseq ;
506
- }
507
-
508
- static void mm_jump_split_left (void * km , const mm_idx_t * mi , const mm_mapopt_t * opt , int32_t qlen , const uint8_t * qseq0 , mm_reg1_t * r , int32_t ts_strand )
509
- {
510
- uint8_t * tseq = 0 , * qseq = 0 ;
511
- int32_t i , n , i0 = -1 , m = 0 , l ;
512
- int32_t ext = 1 + (opt -> b + opt -> a - 1 ) / opt -> a + 1 ;
513
- int32_t clip = !r -> rev ? r -> qs : qlen - r -> qe ;
514
- int32_t extt = clip < ext ? clip : ext ;
515
- const mm_idx_jjump1_t * a ;
516
-
517
- if (mm_jump_check (km , mi , qlen , qseq0 , r , ext + MM_MIN_EXON_LEN , 1 ) < 0 ) return ;
518
- a = mm_idx_jump_get (mi , r -> rid , r -> rs - extt , r -> rs + ext , & n );
519
- if (n == 0 ) return ;
520
-
521
- for (i = 0 ; i < n ; ++ i ) { // traverse possible jumps
522
- const mm_idx_jjump1_t * ai = & a [i ];
523
- int32_t tlen , tl1 , j , mm1 , mm2 ;
524
- assert (ai -> off >= r -> rs - extt && ai -> off < r -> rs + ext );
525
- if (ts_strand * ai -> strand < 0 ) continue ; // wrong strand
526
- if (ai -> off2 >= ai -> off ) continue ; // wrong direction
527
- if (ai -> off2 < clip + ext ) continue ; // not long enough
528
- if (tseq == 0 ) {
529
- tseq = Kcalloc (km , uint8_t , (clip + ext ) * 2 ); // tseq and qseq are allocated together
530
- qseq = tseq + clip + ext ;
531
- mm_jump_get_qseq_seq (km , qlen , qseq0 , r , 1 , clip + ext , qseq );
532
- }
533
- tl1 = clip + (ai -> off - r -> rs );
534
- tlen = mm_idx_getseq2 (mi , 0 , r -> rid , ai -> off , r -> rs + ext , & tseq [tl1 ]);
535
- assert (tlen == r -> rs + ext - ai -> off );
536
- tlen = mm_idx_getseq2 (mi , 0 , r -> rid , ai -> off2 - tl1 , ai -> off2 , tseq );
537
- assert (tlen == tl1 );
538
- for (j = 0 , mm1 = 0 ; j < tl1 ; ++ j )
539
- if (qseq [j ] != tseq [j ] || qseq [j ] > 3 || tseq [j ] > 3 )
540
- ++ mm1 ;
541
- for (mm2 = 0 ; j < clip + ext ; ++ j )
542
- if (qseq [j ] != tseq [j ] || qseq [j ] > 3 || tseq [j ] > 3 )
543
- ++ mm2 ;
544
- if (mm1 == 0 && mm2 == 1 )
545
- i0 = i , ++ m ; // i0 points to the rightmost i
546
- }
547
- kfree (km , tseq );
548
-
549
- l = m > 0 ? a [i0 ].off - r -> rs : 0 ; // may be negative
550
- if (m == 1 && clip + l >= opt -> jump_min_alen ) { // add one more exon
551
- mm_enlarge_cigar (r , 2 );
552
- memmove (r -> p -> cigar + 2 , r -> p -> cigar , r -> p -> n_cigar * 4 );
553
- r -> p -> cigar [0 ] = (clip + l ) << 4 | MM_CIGAR_MATCH ;
554
- r -> p -> cigar [1 ] = (a [i0 ].off - a [i0 ].off2 ) << 4 | MM_CIGAR_N_SKIP ;
555
- r -> p -> cigar [2 ] = ((r -> p -> cigar [2 ]>>4 ) - l ) << 4 | MM_CIGAR_MATCH ;
556
- r -> p -> n_cigar += 2 ;
557
- r -> rs = a [i0 ].off2 - (clip + l );
558
- if (!r -> rev ) r -> qs = 0 ;
559
- else r -> qe = qlen ;
560
- } else if (m > 0 && a [i0 ].off > r -> rs ) { // trim by l; l is always positive
561
- r -> p -> cigar [0 ] -= l << 4 | MM_CIGAR_MATCH ;
562
- r -> rs += l ;
563
- if (!r -> rev ) r -> qs += l ;
564
- else r -> qe -= l ;
565
- }
566
- }
567
-
568
- static void mm_jump_split_right (void * km , const mm_idx_t * mi , const mm_mapopt_t * opt , int32_t qlen , const uint8_t * qseq0 , mm_reg1_t * r , int32_t ts_strand )
569
- {
570
- uint8_t * tseq = 0 , * qseq = 0 ;
571
- int32_t i , n , i0 = -1 , m = 0 , l ;
572
- int32_t ext = 1 + (opt -> b + opt -> a - 1 ) / opt -> a + 1 ;
573
- int32_t clip = !r -> rev ? qlen - r -> qe : r -> qs ;
574
- int32_t extt = clip < ext ? clip : ext ;
575
- const mm_idx_jjump1_t * a ;
576
-
577
- if (mm_jump_check (km , mi , qlen , qseq0 , r , ext + MM_MIN_EXON_LEN , 1 ) < 0 ) return ;
578
- a = mm_idx_jump_get (mi , r -> rid , r -> re - ext , r -> re + extt , & n );
579
- if (n == 0 ) return ;
580
-
581
- for (i = 0 ; i < n ; ++ i ) { // traverse possible jumps
582
- const mm_idx_jjump1_t * ai = & a [i ];
583
- int32_t tlen , tl1 , j , mm1 , mm2 ;
584
- assert (ai -> off >= r -> rs - extt && ai -> off < r -> rs + ext );
585
- if (ts_strand * ai -> strand < 0 ) continue ; // wrong strand
586
- if (ai -> off2 <= ai -> off ) continue ; // wrong direction
587
- if (ai -> off2 + clip + ext > mi -> seq [r -> rid ].len ) continue ; // not long enough
588
- if (tseq == 0 ) {
589
- tseq = Kcalloc (km , uint8_t , (clip + ext ) * 2 ); // tseq and qseq are allocated together
590
- qseq = tseq + clip + ext ;
591
- mm_jump_get_qseq_seq (km , qlen , qseq0 , r , 0 , clip + ext , qseq );
592
- }
593
- tl1 = clip + (r -> re - ai -> off );
594
- tlen = mm_idx_getseq2 (mi , 0 , r -> rid , r -> re - ext , ai -> off , tseq );
595
- assert (tlen == ai -> off - (r -> re - ext ));
596
- tlen = mm_idx_getseq2 (mi , 0 , r -> rid , ai -> off2 , ai -> off2 + tl1 , & tseq [clip + ext - tl1 ]);
597
- assert (tlen == tl1 );
598
- for (j = 0 , mm2 = 0 ; j < clip + ext - tl1 ; ++ j )
599
- if (qseq [j ] != tseq [j ] || qseq [j ] > 3 || tseq [j ] > 3 )
600
- ++ mm2 ;
601
- for (mm1 = 0 ; j < clip + ext ; ++ j )
602
- if (qseq [j ] != tseq [j ] || qseq [j ] > 3 || tseq [j ] > 3 )
603
- ++ mm1 ;
604
- if (mm1 == 0 && mm2 == 1 )
605
- i0 = i0 >= 0 ? i0 : i , ++ m ; // i0 points to the leftmost i
606
- }
607
- kfree (km , tseq );
608
-
609
- l = m > 0 ? r -> re - a [i0 ].off : 0 ; // may be negative
610
- if (m == 1 && clip + l >= opt -> jump_min_alen ) { // add one more exon
611
- mm_enlarge_cigar (r , 2 );
612
- memmove (r -> p -> cigar + 2 , r -> p -> cigar , r -> p -> n_cigar * 4 );
613
- r -> p -> cigar [r -> p -> n_cigar - 1 ] = ((r -> p -> cigar [r -> p -> n_cigar - 1 ]>>4 ) - l ) << 4 | MM_CIGAR_MATCH ;
614
- r -> p -> cigar [r -> p -> n_cigar ] = (a [i0 ].off2 - a [i0 ].off ) << 4 | MM_CIGAR_N_SKIP ;
615
- r -> p -> cigar [r -> p -> n_cigar + 1 ] = (clip + l ) << 4 | MM_CIGAR_MATCH ;
616
- r -> p -> n_cigar += 2 ;
617
- r -> re = a [i0 ].off2 + (clip + l );
618
- if (!r -> rev ) r -> qe = qlen ;
619
- else r -> qs = 0 ;
620
- } else if (m > 0 && r -> re > a [i0 ].off ) { // trim by l; l is always positive
621
- r -> p -> cigar [r -> p -> n_cigar - 1 ] -= l << 4 | MM_CIGAR_MATCH ;
622
- r -> re -= l ;
623
- if (!r -> rev ) r -> qe -= l ;
624
- else r -> qs += l ;
625
- }
626
- }
627
-
628
- void mm_jump_split (void * km , const mm_idx_t * mi , const mm_mapopt_t * opt , int32_t qlen , const uint8_t * qseq , mm_reg1_t * r , int32_t ts_strand )
629
- {
630
- assert ((opt -> flag & MM_F_EQX ) == 0 );
631
- mm_jump_split_left (km , mi , opt , qlen , qseq , r , ts_strand );
632
- mm_jump_split_right (km , mi , opt , qlen , qseq , r , ts_strand );
633
- }
0 commit comments