forked from danny-wilson/omegaMap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMLST.h
104 lines (98 loc) · 3.18 KB
/
MLST.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#ifndef _MLST_H_
#define _MLST_H_
#pragma warning(disable: 4786)
#include <myerror.h>
#include <vector.h>
#include <matrix.h>
#include <DNA.h>
using namespace myutils;
class MLST {
public:
int n; // number of sequences
int nloc; // number of loci
Vector<int> nhap; // nhap[l] (l=0..nloc-1) gives the number of unique alleles at locus l
Vector<DNA> allele; // allele[l] (l=0..nloc-1) stores the DNA sequences of the nhap[l] unique alleles at locus l
Matrix<int> count; // count[l][i] (l=0..nloc-1,i=0..nhap[l]-1) is the count of unique allele i at locus l
Matrix<int> haplotype; // haplotype[i] (i=0..n-1) gives the allelic profile for sequence i, so that
// haplotype[i][l] (l=0..nloc-1) is allele number at locus l, so that the DNA sequence
// is accessed using allele[l][haplotype[i][l]]. However, a short-cut would be, rather than
// using MLST.allele[l][haplotype[i][l]], to use MLST.seq(i,l).
public:
string& seq(const int i, const int l) {
return allele[l][haplotype[i][l]];
}
MLST() {};
MLST(const int nloc_in, const char* filename[]) {
nloc = nloc_in;
Vector<DNA*> temp(nloc);
int l;
for(l=0;l<nloc;l++) temp[l] = new DNA(filename[l]);
initialize(temp);
for(l=0;l<nloc;l++) delete temp[l];
}
MLST(Vector<DNA*> &temp) {
initialize(temp);
}
void initialize(Vector<DNA*> &temp) {
nloc = temp.size();
if(nloc<1) myutils::error("MLST::initialize(): must be at least one locus");
int l;
n = temp[0]->nseq;
for(l=1;l<nloc;l++) if(temp[l]->nseq!=n) myutils::error("MLST(): all loci should have the same number of sequences");
nhap.resize(nloc);
allele.resize(nloc);
haplotype = Matrix<int>(n,nloc,-1);
count = Matrix<int>(nloc,n,0);
Vector<int> convert(n);
int i,j;
for(l=0;l<nloc;l++) {
nhap[l] = 0;
for(i=0;i<n;i++)
for(j=0;j<=i;j++)
if(temp[l]->sequence[i]==temp[l]->sequence[j]) {
++count[l][j];
haplotype[i][l] = j;
break;
}
int check_total = 0;
for(i=0;i<n;i++) {
nhap[l] += (count[l][i]>0) ? 1 : 0;
check_total += count[l][i];
}
if(check_total!=n) myutils::error("MLST(): problem in counting haplotypes");
allele[l].resize(nhap[l],temp[l]->lseq);
int hap = 0;
for(i=0;i<n;i++) {
if(count[l][i]>0) {
allele[l][hap] = temp[l]->sequence[i];
count[l][hap] = count[l][i];
convert[i] = hap;
++hap;
}
}
if(hap!=nhap[l]) myutils::error("MLST(): hap and nhap disagree");
for(;hap<n;hap++) count[l][hap] = 0;
for(i=0;i<n;i++)
haplotype[i][l] = convert[haplotype[i][l]];
/*if(nhap.element[l]>1) {
double pi = allele[l].pi();
double H = allele[l].H();
}*/
}
/*cout << "Allelic profiles of the " << n << " haplotypes" << endl;
for(i=0;i<n;i++){
cout << "Hap" << i << ":";
for(j=0;j<nloc;j++) cout << " " << haplotype[i][j];
cout << endl;
}
cout << endl;
cout << "Frequency of the haplotypes at each locus" << endl;
for(l=0;l<nloc;l++) {
cout << "Loc" << l << ":";
for(i=0;i<nhap[l];i++) cout << " " << count[l][i];
cout << endl;
}
cout << endl;*/
}
};
#endif//_MLST_H_