-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfrequency_estimator.h
155 lines (123 loc) · 4.14 KB
/
frequency_estimator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#ifndef __FREQUENCY_ESTIMATOR_H
#define __FREQUENCY_ESTIMATOR_H
#include <vector>
#include <cstring>
#include <cmath>
#include "MathGenMin.h"
#include "Error.h"
#include "bcf_filtered_reader.h"
#include "Eigen/Dense"
#include "Eigen/Core"
#include "Eigen/SVD"
class frequency_estimator : public VectorFunc {
public:
Eigen::MatrixXd* pEigVecs;
Eigen::BDCSVD<Eigen::MatrixXd>* pSVD;
bool skipIf;
bool skipInfo;
bool siteOnly;
bool nelderMead;
bool assumeHWD;
std::string field;
double gtError;
int32_t nsamples;
int32_t ndims;
double tol;
double maxLambda;
bcf_hdr_t* hdr;
bcf_hdr_t* wdr;
bcf1_t* iv;
float hwe0z;
float hwe1z;
float ibc0;
float ibc1;
float llknull;
int32_t* pls;
int32_t n_pls;
int8_t* ploidies;
float* ifs;
float* betas;
float theta;
int32_t* gt;
int32_t* gq;
double pooled_af;
bool isaf_computed;
frequency_estimator(Eigen::MatrixXd* _pEigVecs, double _tol = 1e-10, double maxLambda = 1.0);
frequency_estimator(Eigen::BDCSVD<Eigen::MatrixXd>* pSVD, double _tol = 1e-10, double maxLambda = 1.0);
~frequency_estimator();
bool set_hdr(bcf_hdr_t* _hdr, bcf_hdr_t* _wdr);
bool set_variant(bcf1_t* _iv, int8_t* ploidies, int32_t* _pl = NULL); //, std::vector<int32_t>* p_icols = NULL);
double estimate_pooled_af_em(int32_t maxiter=10);
void estimate_isaf_em(int32_t maxiter = 20);
void estimate_isaf_em_hwd(int32_t maxiter = 20);
void estimate_isaf_lrt();
void estimate_isaf_simplex();
bool score_test_hwe(bool use_isaf = true);
//bool lr_test_hwe(bool use_isaf = true);
bool update_variant();
bool update_gt_gq(bool update_gq = true);
virtual double Evaluate(Vector& v);
/*
double testHWE(Vector& v) {
int32_t nsamples = pEst->pBfr->get_nsamples();
double expGeno, isaf, isafQ, llk;
double* smEigVecs;
int32_t i, j;
double p0, p1, p2, U, sumU, sumU2;
BCFFilteredReader& bfr = *(pEst->pBfr);
llk = 0; pEst->meanISAF = 0; pEst->maxISAF = 0; pEst->minISAF = 1;
if ( pEst->ndims+1 != v.dim )
error("[E:%s:%d %s] Dimensions do not match %d vs %d", __FILE__, __LINE__, __PRETTY_FUNCTION__, pEst->ndims, v.dim);
sumU = sumU2 = 0;
for(i=0; i < nsamples; ++i) {
expGeno = v[0];
smEigVecs = pEst->pEigVecs->at(i);
for(j=0; j < pEst->ndims; ++j) {
expGeno += (v[j+1] * smEigVecs[j]);
}
isaf = expGeno/2.0;
//isaf = invLogit(expGeno/2.0);
if ( isaf < pEst->minMAF ) isaf = pEst->minMAF;
else if ( 1.0-isaf < pEst->minMAF ) isaf = 1.0-pEst->minMAF;
if ( pEst->maxISAF < isaf ) pEst->maxISAF = isaf;
if ( pEst->minISAF > isaf ) pEst->minISAF = isaf;
pEst->meanISAF += isaf;
isafQ = 1.0-isaf;
p0 = phredConv.toProb(bfr.get_likelihood_at(i*3));
p1 = phredConv.toProb(bfr.get_likelihood_at(i*3+1));
p2 = phredConv.toProb(bfr.get_likelihood_at(i*3+2));
U = (p0-2*p1+p2)/(isafQ*isafQ*p0+2*isaf*isafQ*p1+isaf*isaf*p2+1e-100);
sumU += U;
sumU2 += (U*U);
}
pEst->meanISAF /= nsamples;
return sumU/sqrt(sumU2);
}
virtual double Evaluate(Vector& v) {
int32_t nsamples = pEst->pBfr->get_nsamples();
double expGeno, isaf, isafQ, llk;
double* smEigVecs;
int32_t i, j;
BCFFilteredReader& bfr = *(pEst->pBfr);
llk = 0;
if ( pEst->ndims+1 != v.dim )
error("[E:%s:%d %s] Dimensions do not match %d vs %d", __FILE__, __LINE__, __PRETTY_FUNCTION__, pEst->ndims, v.dim);
for(i=0; i < nsamples; ++i) {
expGeno = v[0];
smEigVecs = pEst->pEigVecs->at(i);
for(j=0; j < pEst->ndims; ++j) {
expGeno += (v[j+1] * smEigVecs[j]);
}
isaf = expGeno/2.0;
//isaf = invLogit(expGeno/2.0);
if ( isaf < pEst->minMAF ) isaf = pEst->minMAF;
else if ( 1.0-isaf < pEst->minMAF ) isaf = 1.0-pEst->minMAF;
isafQ = 1.0-isaf;
llk += log(isafQ * isafQ * phredConv.toProb(bfr.get_likelihood_at(i*3)) + 2.0 * isaf * isafQ * phredConv.toProb(bfr.get_likelihood_at(i*3+1)) + isaf * isaf * phredConv.toProb(bfr.get_likelihood_at(i*3+2)));
}
return 0-llk;
}
};
*/
};
#endif