Skip to content

Commit 6ad5a3c

Browse files
committed
removed color-space support
which has been broken since 0.6.x
1 parent e5ab59d commit 6ad5a3c

File tree

11 files changed

+17
-458
lines changed

11 files changed

+17
-458
lines changed

Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64
66
LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \
77
bseq.o bwaseqio.o bwase.o kstring.o
88
AOBJS= QSufSort.o bwt_gen.o \
9-
is.o bwtmisc.o bwtindex.o ksw.o simple_dp.o \
10-
bwape.o cs2nt.o \
9+
is.o bwtmisc.o bwtindex.o ksw.o bwape.o \
1110
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
1211
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
1312
PROG= bwa

bwape.c

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -212,19 +212,6 @@ static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm,
212212
last_pos[x.y&1][1] = x;
213213
}
214214
}
215-
} else if (opt->type == BWA_PET_SOLID) {
216-
for (i = 0; i < d->arr.n; ++i) {
217-
pair64_t x = d->arr.a[i];
218-
int strand = x.y>>1&1;
219-
if ((strand^x.y)&1) { // push
220-
int y = 1 - (x.y&1);
221-
__pairing_aux(last_pos[y][1], x);
222-
__pairing_aux(last_pos[y][0], x);
223-
} else { // check
224-
last_pos[x.y&1][0] = last_pos[x.y&1][1];
225-
last_pos[x.y&1][1] = x;
226-
}
227-
}
228215
} else {
229216
fprintf(stderr, "[paring] not implemented yet!\n");
230217
exit(1);
@@ -567,11 +554,11 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
567554
++n_tot[is_singleton];
568555
cigar[0] = cigar[1] = 0;
569556
n_cigar[0] = n_cigar[1] = 0;
570-
if (popt->type != BWA_PET_STD && popt->type != BWA_PET_SOLID) continue; // other types of pairing is not considered
557+
if (popt->type != BWA_PET_STD) continue; // other types of pairing is not considered
571558
for (k = 0; k < 2; ++k) { // p[1-k] is the reference read and p[k] is the read considered to be modified
572559
ubyte_t *seq;
573560
if (p[1-k]->type == BWA_TYPE_NO_MATCH) continue; // if p[1-k] is unmapped, skip
574-
if (popt->type == BWA_PET_STD) {
561+
{ // note that popt->type == BWA_PET_STD always true; in older versions, there was a branch for color-space FF/RR reads
575562
if (p[1-k]->strand == 0) { // then the mate is on the reverse strand and has larger coordinate
576563
__set_rght_coor(beg[k], end[k], p[1-k], p[k]);
577564
seq = p[k]->rseq;
@@ -580,17 +567,6 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
580567
seq = p[k]->seq;
581568
seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed; this will reversed back shortly
582569
}
583-
} else { // BWA_PET_SOLID
584-
if (p[1-k]->strand == 0) { // R3-F3 pairing
585-
if (k == 0) __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
586-
else __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
587-
seq = p[k]->rseq;
588-
seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed
589-
} else { // F3-R3 pairing
590-
if (k == 0) __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
591-
else __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
592-
seq = p[k]->seq;
593-
}
594570
}
595571
// perform SW alignment
596572
cigar[k] = bwa_sw_core(bns->l_pac, pacseq, p[k]->len, seq, &beg[k], end[k] - beg[k], &n_cigar[k], &cnt[k]);
@@ -654,7 +630,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
654630
bwa_seq_t *seqs[2];
655631
bwa_seqio_t *ks[2];
656632
clock_t t;
657-
bntseq_t *bns, *ntbns = 0;
633+
bntseq_t *bns;
658634
FILE *fp_sa[2];
659635
gap_opt_t opt, opt0;
660636
khint_t iter;
@@ -679,10 +655,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
679655
opt0 = opt;
680656
fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten!
681657
ks[1] = bwa_open_reads(opt.mode, fn_fa[1]);
682-
if (!(opt.mode & BWA_MODE_COMPREAD)) {
683-
popt->type = BWA_PET_SOLID;
684-
ntbns = bwa_open_nt(prefix);
685-
} else { // for Illumina alignment only
658+
{ // for Illumina alignment only
686659
if (popt->is_preload) {
687660
strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
688661
strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt);
@@ -715,7 +688,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
715688

716689
fprintf(stderr, "[bwa_sai2sam_pe_core] refine gapped alignments... ");
717690
for (j = 0; j < 2; ++j)
718-
bwa_refine_gapped(bns, n_seqs, seqs[j], pacseq, ntbns);
691+
bwa_refine_gapped(bns, n_seqs, seqs[j], pacseq);
719692
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
720693
if (pac == 0) free(pacseq);
721694

@@ -740,7 +713,6 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
740713

741714
// destroy
742715
bns_destroy(bns);
743-
if (ntbns) bns_destroy(ntbns);
744716
for (i = 0; i < 2; ++i) {
745717
bwa_seq_close(ks[i]);
746718
fclose(fp_sa[i]);

bwase.c

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -296,18 +296,12 @@ void bwa_correct_trimmed(bwa_seq_t *s)
296296
s->len = s->full_len;
297297
}
298298

299-
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns)
299+
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq)
300300
{
301-
ubyte_t *pacseq, *ntpac = 0;
301+
ubyte_t *pacseq;
302302
int i, j;
303303
kstring_t *str;
304304

305-
if (ntbns) { // in color space
306-
ntpac = (ubyte_t*)calloc(ntbns->l_pac/4+1, 1);
307-
rewind(ntbns->fp_pac);
308-
fread(ntpac, 1, ntbns->l_pac/4 + 1, ntbns->fp_pac);
309-
}
310-
311305
if (!_pacseq) {
312306
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
313307
rewind(bns->fp_pac);
@@ -328,47 +322,23 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
328322
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, &s->pos,
329323
(s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 1);
330324
}
331-
#if 0
332-
if (ntbns) { // in color space
333-
for (i = 0; i < n_seqs; ++i) {
334-
bwa_seq_t *s = seqs + i;
335-
bwa_cs2nt_core(s, bns->l_pac, ntpac);
336-
for (j = 0; j < s->n_multi; ++j) {
337-
bwt_multi1_t *q = s->multi + j;
338-
int n_cigar;
339-
if (q->gap == 0) continue;
340-
free(q->cigar);
341-
q->cigar = bwa_refine_gapped_core(bns->l_pac, ntpac, s->len, q->strand? s->rseq : s->seq, &q->pos,
342-
(q->strand? 1 : -1) * q->gap, &n_cigar, 0);
343-
q->n_cigar = n_cigar;
344-
}
345-
if (s->type != BWA_TYPE_NO_MATCH && s->cigar) { // update cigar again
346-
free(s->cigar);
347-
s->cigar = bwa_refine_gapped_core(bns->l_pac, ntpac, s->len, s->strand? s->rseq : s->seq, &s->pos,
348-
(s->strand? 1 : -1) * (s->n_gapo + s->n_gape), &s->n_cigar, 0);
349-
}
350-
}
351-
}
352-
#endif
353325
// generate MD tag
354326
str = (kstring_t*)calloc(1, sizeof(kstring_t));
355327
for (i = 0; i != n_seqs; ++i) {
356328
bwa_seq_t *s = seqs + i;
357329
if (s->type != BWA_TYPE_NO_MATCH) {
358330
int nm;
359331
s->md = bwa_cal_md1(s->n_cigar, s->cigar, s->len, s->pos, s->strand? s->rseq : s->seq,
360-
bns->l_pac, ntbns? ntpac : pacseq, str, &nm);
332+
bns->l_pac, pacseq, str, &nm);
361333
s->nm = nm;
362334
}
363335
}
364336
free(str->s); free(str);
365337

366338
// correct for trimmed reads
367-
if (!ntbns) // trimming is only enabled for Illumina reads
368-
for (i = 0; i < n_seqs; ++i) bwa_correct_trimmed(seqs + i);
339+
for (i = 0; i < n_seqs; ++i) bwa_correct_trimmed(seqs + i);
369340

370341
if (!_pacseq) free(pacseq);
371-
free(ntpac);
372342
}
373343

374344
int64_t pos_end(const bwa_seq_t *p)
@@ -587,7 +557,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
587557
bwa_seq_t *seqs;
588558
bwa_seqio_t *ks;
589559
clock_t t;
590-
bntseq_t *bns, *ntbns = 0;
560+
bntseq_t *bns;
591561
FILE *fp_sa;
592562
gap_opt_t opt;
593563

@@ -599,8 +569,6 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
599569

600570
m_aln = 0;
601571
fread(&opt, sizeof(gap_opt_t), 1, fp_sa);
602-
if (!(opt.mode & BWA_MODE_COMPREAD)) // in color space; initialize ntpac
603-
ntbns = bwa_open_nt(prefix);
604572
bwa_print_sam_SQ(bns);
605573
//bwa_print_sam_PG();
606574
// set ks
@@ -628,7 +596,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
628596
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
629597

630598
fprintf(stderr, "[bwa_aln_core] refine gapped alignments... ");
631-
bwa_refine_gapped(bns, n_seqs, seqs, 0, ntbns);
599+
bwa_refine_gapped(bns, n_seqs, seqs, 0);
632600
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
633601

634602
fprintf(stderr, "[bwa_aln_core] print alignments... ");
@@ -642,7 +610,6 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
642610

643611
// destroy
644612
bwa_seq_close(ks);
645-
if (ntbns) bns_destroy(ntbns);
646613
bns_destroy(bns);
647614
fclose(fp_sa);
648615
free(aln);

bwase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ extern "C" {
1414
// Calculate the approximate position of the sequence from the specified bwt with loaded suffix array.
1515
void bwa_cal_pac_pos_core(const bntseq_t *bns, const bwt_t* bwt, bwa_seq_t* seq, const int max_mm, const float fnr);
1616
// Refine the approximate position of the sequence to an actual placement for the sequence.
17-
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns);
17+
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq);
1818
// Backfill certain alignment properties mainly centering around number of matches.
1919
void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
2020
// Calculate the end position of a read given a certain sequence.

bwtaln.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ int bwa_aln(int argc, char *argv[])
252252
char *prefix;
253253

254254
opt = gap_init_opt();
255-
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
255+
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:LR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
256256
switch (c) {
257257
case 'n':
258258
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
@@ -272,7 +272,6 @@ int bwa_aln(int argc, char *argv[])
272272
case 'L': opt->mode |= BWA_MODE_LOGGAP; break;
273273
case 'R': opt->max_top2 = atoi(optarg); break;
274274
case 'q': opt->trim_qual = atoi(optarg); break;
275-
case 'c': opt->mode &= ~BWA_MODE_COMPREAD; break;
276275
case 'N': opt->mode |= BWA_MODE_NONSTOP; opt->max_top2 = 0x7fffffff; break;
277276
case 'f': xreopen(optarg, "wb", stdout); break;
278277
case 'b': opt->mode |= BWA_MODE_BAM; break;
@@ -310,7 +309,6 @@ int bwa_aln(int argc, char *argv[])
310309
fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
311310
fprintf(stderr, " -f FILE file to write output to instead of stdout\n");
312311
fprintf(stderr, " -B INT length of barcode\n");
313-
// fprintf(stderr, " -c input sequences are in the color space\n");
314312
fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
315313
fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
316314
fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n");

bwtaln.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ typedef struct {
107107
} gap_opt_t;
108108

109109
#define BWA_PET_STD 1
110-
#define BWA_PET_SOLID 2
111110

112111
typedef struct {
113112
int max_isize, force_isize;

bwtindex.c

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev);
4242
int bwa_index(int argc, char *argv[])
4343
{
4444
char *prefix = 0, *str, *str2, *str3;
45-
int c, algo_type = 0, is_color = 0, is_64 = 0;
45+
int c, algo_type = 0, is_64 = 0;
4646
clock_t t;
4747
int64_t l_pac;
4848

49-
while ((c = getopt(argc, argv, "6ca:p:")) >= 0) {
49+
while ((c = getopt(argc, argv, "6a:p:")) >= 0) {
5050
switch (c) {
5151
case 'a': // if -a is not set, algo_type will be determined later
5252
if (strcmp(optarg, "div") == 0) algo_type = 1;
@@ -55,7 +55,6 @@ int bwa_index(int argc, char *argv[])
5555
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
5656
break;
5757
case 'p': prefix = strdup(optarg); break;
58-
case 'c': is_color = 1; break;
5958
case '6': is_64 = 1; break;
6059
default: return 1;
6160
}
@@ -67,7 +66,6 @@ int bwa_index(int argc, char *argv[])
6766
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
6867
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
6968
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
70-
// fprintf(stderr, " -c build color-space index\n");
7169
fprintf(stderr, "\n");
7270
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
7371
fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
@@ -83,29 +81,13 @@ int bwa_index(int argc, char *argv[])
8381
str2 = (char*)calloc(strlen(prefix) + 10, 1);
8482
str3 = (char*)calloc(strlen(prefix) + 10, 1);
8583

86-
if (is_color == 0) { // nucleotide indexing
84+
{ // nucleotide indexing
8785
gzFile fp = xzopen(argv[optind], "r");
8886
t = clock();
8987
fprintf(stderr, "[bwa_index] Pack FASTA... ");
9088
l_pac = bns_fasta2bntseq(fp, prefix, 0);
9189
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
9290
gzclose(fp);
93-
} else { // color indexing
94-
gzFile fp = xzopen(argv[optind], "r");
95-
strcat(strcpy(str, prefix), ".nt");
96-
t = clock();
97-
fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
98-
l_pac = bns_fasta2bntseq(fp, str, 0);
99-
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
100-
gzclose(fp);
101-
{
102-
char *tmp_argv[3];
103-
tmp_argv[0] = argv[0]; tmp_argv[1] = str; tmp_argv[2] = prefix;
104-
t = clock();
105-
fprintf(stderr, "[bwa_index] Convert nucleotide PAC to color PAC... ");
106-
bwa_pac2cspac(3, tmp_argv);
107-
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
108-
}
10991
}
11092
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
11193
{

0 commit comments

Comments
 (0)