-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathalgSig.rb
executable file
·165 lines (127 loc) · 4.01 KB
/
algSig.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/ruby
require 'extArray'
require 'extMath'
#-------------------------------------------
# update the d-score based on the values from
# the null encoder (pn) and the alt encoder (pa)
def calcD(pn,pa)
return -1*(Math.log2(pn/pa))
end
#-------------------------------------------
# define the null probability bet
# return based on what bit value came up
def probNull(bit)
if bit == 1
return @probNullVal
else #bit == 0
return @negProbNullVal
end
end
#-------------------------------------------
# define the A0 probability bet
def probA0(bit)
# not quite zero, to avoid paying infinitely large
# penalty when calculating score (div by zero)
if bit == 0
return 0.999999
else #bit == 1
# This value can be used to tweak coverage vs
# exclusivity of modules
return 0.000005
end
end
#-------------------------------------------
# define the A1 probability bet
def probA1(bit, sampleSum, remainingSamples, geneSum, remainingGenes, mutsRemaining, bitsRemaining)
ar = sampleSum.to_f/remainingSamples.to_f
ac = geneSum.to_f/remainingGenes.to_f
a = mutsRemaining.to_f/bitsRemaining.to_f
prob = nil
if a == 1 #avoid division by zero problem in prob calculation
prob = 1
else
prob = (ar*ac)/(a*(((1-ar)*(1-ac)/(1-a))+(ar*ac/a)))
end
#avoid division by zero problem in d calculation
if prob == 1
prob = 0.999999
elsif prob == 0
prob = 0.000001
end
if bit == 1
return prob
end
return 1-prob
end
#-------------------------------------------
# calculate the significance of the matrix's pattern
#
def sigCalc(matrix)
d = 0
mutsRemaining = matrix.rowSums.sum
bitsRemaining = matrix.numRows * matrix.numCols
geneSums = Array.new(matrix.rowSums)
#look at one sample at a time
matrix.colHeader.each_index{|sampIndex|
sample = matrix.getColByIndex(sampIndex)
foundOne = false
sampSum = sample.sumNum
#now, look at each bit in that sample
sample.each_index{|geneIndex|
bit = sample[geneIndex]
# We have already seen a one, and are betting the XOR pattern,
# or we know there are no more mutations here due to the sums
if foundOne == true || mutsRemaining == 0
d += calcD(probNull(bit),probA0(bit))
else #estimate the mutation probability
d += calcD(probNull(bit),probA1(bit, geneSums[geneIndex], (matrix.numCols-sampIndex), sampSum, sample.length-geneIndex, mutsRemaining,bitsRemaining))
end
if bit == 1
foundOne = true
geneSums[geneIndex] -= 1
sampSum -= 1
mutsRemaining -= 1
end
bitsRemaining -=1
}
}
return d
end
#-----------------------------------------------
# Have to adjust the final score by subtracting the number
# of bits of information that we use for sums, sorting, etc
def calcAdjustment(matrix,totalGenes)
#score adjustment for col/row sum
colAdj = 0
matrix.colSums.each{|sum|
colAdj += Math.log2star(sum)
}
rowAdj = 0
matrix.rowSums.each{|sum|
rowAdj += Math.log2star(sum)
}
# each row gets sorted by its sum
rowSortAdj = matrix.rowSums.calcUniqPermutationsLog2
colSortAdj = matrix.colSums.calcUniqPermutationsLog2
sortAdj = rowSortAdj + colSortAdj
# score adjustment for which set of N genes to use
# (think "multiple testing" via binomial coefficient)
testAdj = Math.binomCoefficientLog2(totalGenes,matrix.numRows)
return colAdj + rowAdj + testAdj +sortAdj
end
#-------------------------------------------
# takes a matrix of values, total number of genes assayed,
def calcSignificance(inMatrix,totalGenes,bgRate)
#total number of mutations in matrix
oneCount = inMatrix.rowSums.sum
#calculate the adjustment from the raw matrix, for simplicity (prob should be switched later)
penalty = calcAdjustment(inMatrix,totalGenes)
#sort the matrix (have already paid the penalty for this)
inMatrix.sortByRowSums!
inMatrix.sortByColSums!
@probNullVal = bgRate
@negProbNullVal = 1-@probNullVal
#calculate d for the matrix
d = sigCalc(inMatrix)
return d - penalty
end