10
10
import torch .nn .functional as F
11
11
from utils import *
12
12
13
+ def get_free_gpu ():
14
+ os .system ('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free > ./tmp' )
15
+ memory_available = [int (x .split ()[2 ]) for x in open ('tmp' , 'r' ).readlines ()]
16
+ if len (memory_available ) > 0 :
17
+ id = int (np .argmax (memory_available ))
18
+ print ("setting to gpu:%d" % id )
19
+ torch .cuda .set_device (id )
20
+ return "cuda:%d" % id
21
+ else :
22
+ return
23
+
24
+ if torch .cuda .is_available ():
25
+ current_device = get_free_gpu ()
26
+ else :
27
+ current_device = 'cpu'
28
+
13
29
device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
14
30
15
- def proba2matrix (sample , weight = None , proba = None ):
31
+ def proba2matrix (sample , weight = None , proba = None , intra = True ):
16
32
17
33
sample_left = sample
18
34
weight_left = weight
19
- sample_left -= np .min (sample_left )
20
- size = int (np .max (sample_left ) + 1 )
21
- m = np .zeros ((size , size ), dtype = 'float32' )
22
- if weight is not None :
23
- for i in range (sample_left .shape [- 1 ] - 1 ):
24
- for j in range ( i + 1 , sample_left .shape [- 1 ]):
25
- m [sample_left [: ,i ], sample_left [: ,j ]] += np .maximum (proba * weight_left , proba )
26
-
35
+ if intra :
36
+ sample_left -= np .min (sample_left )
37
+ size = int (np .max (sample_left ) + 1 )
38
+ m = np .zeros ((size , size ), dtype = 'float32' )
39
+ if weight is not None :
40
+ for i in range (sample_left .shape [- 1 ] - 1 ):
41
+ for j in range ( i + 1 , sample_left .shape [- 1 ]):
42
+ m [sample_left [: ,i ], sample_left [: ,j ]] += np .maximum (proba * weight_left , proba )
43
+
44
+ else :
45
+ for i in range (sample_left .shape [- 1 ] - 1 ):
46
+ for j in range (i + 1 , sample_left .shape [- 1 ]):
47
+ m [sample_left [:, i ], sample_left [:, j ]] += proba
48
+
49
+ m = m + m .T
27
50
else :
28
- for i in range (sample_left .shape [- 1 ] - 1 ):
29
- for j in range (i + 1 , sample_left .shape [- 1 ]):
30
- m [sample_left [:, i ], sample_left [:, j ]] += proba
31
-
32
- m = m [1 : ,1 :]
33
- m = m + m .T
51
+ size1 = int (np .max (sample_left [:, 0 ]) - np .min (sample_left [:, 0 ]) + 1 )
52
+ size2 = int (np .max (sample_left [:, 1 ]) - np .min (sample_left [:, 1 ]) + 1 )
53
+ m = np .zeros ((size1 , size2 ), dtype = 'float32' )
54
+ if weight is not None :
55
+ m [sample_left [:, 0 ] - np .min (sample_left [:, 0 ]), sample_left [:, 1 ]- np .min (sample_left [:, 1 ])] += np .maximum (proba * weight_left , proba )
56
+
57
+ else :
58
+ m [sample_left [:, 0 ] - np .min (sample_left [:, 0 ]), sample_left [:, 1 ] - np .min (sample_left [:, 1 ])] += proba
59
+
34
60
35
61
return m
36
62
37
63
38
64
39
65
66
+
40
67
def generate_pair_wise (chrom_id ):
41
68
samples = []
42
69
for i in range (chrom_range [chrom_id ,0 ] ,chrom_range [chrom_id ,1 ]):
@@ -49,9 +76,9 @@ def generate_pair_wise(chrom_id):
49
76
def predict (model , input ):
50
77
model .eval ()
51
78
output = []
52
- new_batch_size = int (1e5 )
79
+ new_batch_size = int (1e4 )
53
80
with torch .no_grad ():
54
- for j in range (math .ceil (len (input ) / new_batch_size )):
81
+ for j in trange (math .ceil (len (input ) / new_batch_size )):
55
82
x = input [j * new_batch_size :min ((j + 1 ) * new_batch_size , len (input ))]
56
83
x = np2tensor_hyper (x , dtype = torch .long )
57
84
x = pad_sequence (x , batch_first = True , padding_value = 0 ).to (device )
@@ -63,12 +90,14 @@ def predict(model, input):
63
90
config = get_config ()
64
91
min_dis = config ['min_distance' ]
65
92
temp_dir = config ['temp_dir' ]
93
+ res = config ['resolution' ]
66
94
vmin = - 0.0
67
95
vmax = 1.0
68
96
69
97
70
98
chrom_range = np .load (os .path .join (temp_dir ,"chrom_range.npy" ))
71
- classifier_model = torch .load (os .path .join (temp_dir , "model2load" ))
99
+ classifier_model = torch .load (os .path .join (temp_dir , "model2load" ), map_location = current_device )
100
+
72
101
print ("device" , classifier_model .layer_norm1 .weight .device )
73
102
device_info = classifier_model .layer_norm1 .weight .device
74
103
device_info = str (device_info ).split (":" )[- 1 ]
@@ -78,31 +107,85 @@ def predict(model, input):
78
107
79
108
origin = np .load (os .path .join (temp_dir , "intra_adj.npy" )).astype ('float32' )
80
109
# origin = np.load(os.path.join(temp_dir, "edge_list_adj.npy")).astype('float32')
81
- for i in range (len (chrom_range )):
110
+
111
+ import h5py
112
+
113
+ ## generating mcool file
114
+ f = h5py .File ("../denoised.mcool" , "w" )
115
+ grp = f .create_group ("resolutions" )
116
+ grp = grp .create_group ("%d" % res )
117
+ cooler_bin = grp .create_group ("bins" )
118
+ node2bin = np .load (os .path .join (temp_dir ,"node2bin.npy" ), allow_pickle = True ).item ()
119
+
120
+ chrom_list = []
121
+ chrom_start = []
122
+ chrom_end = []
123
+ chrom_name = config ['chrom_list' ]
124
+
125
+ for i in range (1 , int (np .max (list (node2bin .keys ())) + 1 )):
126
+ bin_ = node2bin [i ]
127
+ chrom , start = bin_ .split (":" )
128
+ chrom_list .append (chrom_name .index (chrom ))
129
+ chrom_start .append (int (start ))
130
+ chrom_end .append (int (start ) + res )
131
+
132
+ print (np .array (chrom_list ), np .array (chrom_start ))
133
+ cooler_bin .create_dataset ("chrom" , data = chrom_list )
134
+ cooler_bin .create_dataset ("start" , data = chrom_start )
135
+ cooler_bin .create_dataset ("end" , data = chrom_end )
136
+
137
+ cooler_chrom = grp .create_group ("chroms" )
138
+ cooler_chrom .create_dataset ("name" , data = [l .encode ('utf8' ) for l in chrom_name ], dtype = h5py .special_dtype (vlen = str ))
139
+
140
+ cooler_pixels = grp .create_group ("pixels" )
141
+ bin_id1 = []
142
+ bin_id2 = []
143
+ balanced = []
144
+
145
+
146
+
147
+ for i in range (len (chrom_name )):
82
148
pair_wise = generate_pair_wise (i )
149
+ print (pair_wise )
150
+ bin_id1 .append (np .copy (pair_wise [:, 0 ]) - 1 )
151
+ bin_id2 .append (np .copy (pair_wise [:, 1 ]) - 1 )
83
152
# print (pair_wise.shape, pair_wise)
84
153
proba = predict (classifier_model , pair_wise ).reshape ((- 1 ))
85
154
if task_mode == 'class' :
86
155
proba = torch .sigmoid (torch .from_numpy (proba )).numpy ()
87
156
else :
88
157
proba = F .softplus (torch .from_numpy (proba )).numpy ()
89
- print ( np .sum (proba >= 0.5 ) ,proba .shape )
158
+ # print ( np.sum(proba >= 0.5) ,proba.shape)
90
159
91
160
pair_wise_weight = np .array ([origin [e [0 ] - 1 ,e [1 ] - 1 ] for e in tqdm (pair_wise )])
92
161
93
162
my_proba = proba2matrix (pair_wise , None , proba )
94
- coverage = np .sqrt (np .sum (my_proba , axis = - 1 ))
95
- my_proba = my_proba / coverage .reshape ((- 1 , 1 ))
96
- my_proba = my_proba / coverage .reshape ((1 , - 1 ))
97
-
163
+ coverage1 = np .sqrt (np .mean (my_proba , axis = - 1 , keepdims = True ))
164
+ coverage2 = np .sqrt (np .mean (my_proba , axis = 0 , keepdims = True ))
165
+ my_proba = my_proba / (coverage1 + 1e-15 )
166
+ my_proba = my_proba / (coverage2 + 1e-15 )
167
+
98
168
origin_part = proba2matrix (pair_wise , None , pair_wise_weight )
169
+ gap1 = np .sum (origin_part , axis = - 1 ) == 0
170
+ gap2 = np .sum (origin_part , axis = 0 ) == 0
171
+ coverage1 = np .sqrt (np .mean (origin_part , axis = - 1 , keepdims = True ))
172
+ coverage2 = np .sqrt (np .mean (origin_part , axis = 0 , keepdims = True ))
173
+ origin_part = origin_part / (coverage1 + 1e-15 )
174
+ origin_part = origin_part / (coverage2 + 1e-15 )
175
+
176
+
99
177
my = my_proba * origin_part
100
-
101
- gap = np .sum (origin_part , axis = - 1 ) == 0
102
- my [gap ,:] = 0.0
103
- my [:, gap ] = 0.0
104
- my_proba [gap ,:] = 0.0
105
- my_proba [:, gap ] = 0.0
178
+ my = np .maximum (my_proba * origin_part , my_proba )
179
+ coverage1 = np .sqrt (np .mean (my , axis = - 1 , keepdims = True ))
180
+ coverage2 = np .sqrt (np .mean (my , axis = 0 , keepdims = True ))
181
+ my = my / (coverage1 + 1e-15 )
182
+ my = my / (coverage2 + 1e-15 )
183
+
184
+
185
+ my [gap1 ,:] = 0.0
186
+ my [:, gap2 ] = 0.0
187
+ my_proba [gap1 ,:] = 0.0
188
+ my_proba [:, gap2 ] = 0.0
106
189
107
190
my = transformer .fit_transform (my .reshape ((- 1 , 1 ))).reshape ((len (my ), - 1 ))
108
191
origin_part = transformer .fit_transform (origin_part .reshape ((- 1 , 1 ))).reshape ((len (origin_part ), - 1 ))
@@ -115,8 +198,14 @@ def predict(model, input):
115
198
ax = sns .heatmap (my , cmap = "Reds" , square = True , mask = mask ,cbar = False , vmin = vmin , vmax = vmax )
116
199
ax .get_xaxis ().set_visible (False )
117
200
ax .get_yaxis ().set_visible (False )
118
- plt .savefig ("../chr%d_denoise .png" % ( i + 1 ) , dpi = 300 )
201
+ plt .savefig ("../%s_denoise .png" % chrom_name [ i ] , dpi = 300 )
119
202
plt .close (fig )
203
+
204
+
205
+ min_ = np .min (pair_wise )
206
+ # print (pair_wise[:, 0] - min_, pair_wise[:, 1] - min_, my.shape)
207
+ value = my [pair_wise [:, 0 ] - min_ , pair_wise [:, 1 ] - min_ ]
208
+ balanced .append (value )
120
209
121
210
# fig = plt.figure(figsize=(5, 5))
122
211
# plt.subplots_adjust(left=0.0, right=1.0, top=1.0, bottom=0.0)
@@ -135,7 +224,13 @@ def predict(model, input):
135
224
ax = sns .heatmap (origin_part , cmap = "Reds" , square = True , mask = mask ,cbar = False , vmin = vmin , vmax = vmax )
136
225
ax .get_xaxis ().set_visible (False )
137
226
ax .get_yaxis ().set_visible (False )
138
- plt .savefig ("../chr%d_origin .png" % ( i + 1 ) , dpi = 300 )
227
+ plt .savefig ("../%s_origin .png" % chrom_name [ i ] , dpi = 300 )
139
228
plt .close (fig )
140
229
141
230
231
+ bin_id1 = np .concatenate (bin_id1 , axis = 0 )
232
+ bin_id2 = np .concatenate (bin_id2 , axis = 0 )
233
+ balanced = np .concatenate (balanced , axis = 0 )
234
+ cooler_pixels .create_dataset ("bin1_id" , data = bin_id1 )
235
+ cooler_pixels .create_dataset ("bin2_id" , data = bin_id2 )
236
+ cooler_pixels .create_dataset ("balanced" , data = balanced )
0 commit comments