Skip to content

Commit 1b8e1f2

Browse files
committed
denoise_contact.py update
The script will now output an mcool file as well.
1 parent 51382f4 commit 1b8e1f2

File tree

3 files changed

+128
-32
lines changed

3 files changed

+128
-32
lines changed

.DS_Store

0 Bytes
Binary file not shown.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
.DS_Store
33
.DS_Store
44
.DS_Store
5+
*.json

Code/denoise_contact.py

Lines changed: 127 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,60 @@
1010
import torch.nn.functional as F
1111
from utils import *
1212

13+
def get_free_gpu():
14+
os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free > ./tmp')
15+
memory_available = [int(x.split()[2]) for x in open('tmp', 'r').readlines()]
16+
if len(memory_available) > 0:
17+
id = int(np.argmax(memory_available))
18+
print("setting to gpu:%d" % id)
19+
torch.cuda.set_device(id)
20+
return "cuda:%d" % id
21+
else:
22+
return
23+
24+
if torch.cuda.is_available():
25+
current_device = get_free_gpu()
26+
else:
27+
current_device = 'cpu'
28+
1329
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
1430

15-
def proba2matrix(sample, weight=None, proba=None):
31+
def proba2matrix(sample, weight=None, proba=None, intra=True):
1632

1733
sample_left = sample
1834
weight_left = weight
19-
sample_left -= np.min(sample_left)
20-
size = int(np.max(sample_left) + 1 )
21-
m = np.zeros((size, size), dtype='float32')
22-
if weight is not None:
23-
for i in range(sample_left.shape[-1] - 1):
24-
for j in range( i +1, sample_left.shape[-1]):
25-
m[sample_left[: ,i], sample_left[: ,j]] += np.maximum(proba * weight_left, proba)
26-
35+
if intra:
36+
sample_left -= np.min(sample_left)
37+
size = int(np.max(sample_left) + 1 )
38+
m = np.zeros((size, size), dtype='float32')
39+
if weight is not None:
40+
for i in range(sample_left.shape[-1] - 1):
41+
for j in range( i +1, sample_left.shape[-1]):
42+
m[sample_left[: ,i], sample_left[: ,j]] += np.maximum(proba * weight_left, proba)
43+
44+
else:
45+
for i in range(sample_left.shape[-1] - 1):
46+
for j in range(i + 1, sample_left.shape[-1]):
47+
m[sample_left[:, i], sample_left[:, j]] += proba
48+
49+
m = m + m.T
2750
else:
28-
for i in range(sample_left.shape[-1] - 1):
29-
for j in range(i + 1, sample_left.shape[-1]):
30-
m[sample_left[:, i], sample_left[:, j]] += proba
31-
32-
m = m[1: ,1:]
33-
m = m + m.T
51+
size1 = int(np.max(sample_left[:, 0]) - np.min(sample_left[:, 0]) + 1)
52+
size2 = int(np.max(sample_left[:, 1]) - np.min(sample_left[:, 1]) + 1)
53+
m = np.zeros((size1, size2), dtype='float32')
54+
if weight is not None:
55+
m[sample_left[:, 0] - np.min(sample_left[:, 0]), sample_left[:, 1]- np.min(sample_left[:, 1])] += np.maximum(proba * weight_left, proba)
56+
57+
else:
58+
m[sample_left[:, 0] - np.min(sample_left[:, 0]), sample_left[:, 1] - np.min(sample_left[:, 1])] += proba
59+
3460

3561
return m
3662

3763

3864

3965

66+
4067
def generate_pair_wise(chrom_id):
4168
samples = []
4269
for i in range(chrom_range[chrom_id ,0] ,chrom_range[chrom_id ,1]):
@@ -49,9 +76,9 @@ def generate_pair_wise(chrom_id):
4976
def predict(model, input):
5077
model.eval()
5178
output = []
52-
new_batch_size = int(1e5)
79+
new_batch_size = int(1e4)
5380
with torch.no_grad():
54-
for j in range(math.ceil(len(input) / new_batch_size)):
81+
for j in trange(math.ceil(len(input) / new_batch_size)):
5582
x = input[j * new_batch_size:min((j + 1) * new_batch_size, len(input))]
5683
x = np2tensor_hyper(x, dtype=torch.long)
5784
x = pad_sequence(x, batch_first=True, padding_value=0).to(device)
@@ -63,12 +90,14 @@ def predict(model, input):
6390
config = get_config()
6491
min_dis = config['min_distance']
6592
temp_dir = config['temp_dir']
93+
res = config['resolution']
6694
vmin = -0.0
6795
vmax = 1.0
6896

6997

7098
chrom_range = np.load(os.path.join(temp_dir,"chrom_range.npy"))
71-
classifier_model = torch.load(os.path.join(temp_dir, "model2load"))
99+
classifier_model = torch.load(os.path.join(temp_dir, "model2load"), map_location=current_device)
100+
72101
print ("device", classifier_model.layer_norm1.weight.device)
73102
device_info = classifier_model.layer_norm1.weight.device
74103
device_info = str(device_info).split(":")[-1]
@@ -78,31 +107,85 @@ def predict(model, input):
78107

79108
origin = np.load(os.path.join(temp_dir, "intra_adj.npy")).astype('float32')
80109
# origin = np.load(os.path.join(temp_dir, "edge_list_adj.npy")).astype('float32')
81-
for i in range(len(chrom_range)):
110+
111+
import h5py
112+
113+
## generating mcool file
114+
f = h5py.File("../denoised.mcool", "w")
115+
grp = f.create_group("resolutions")
116+
grp = grp.create_group("%d" % res)
117+
cooler_bin = grp.create_group("bins")
118+
node2bin = np.load(os.path.join(temp_dir,"node2bin.npy"), allow_pickle=True).item()
119+
120+
chrom_list = []
121+
chrom_start = []
122+
chrom_end = []
123+
chrom_name = config['chrom_list']
124+
125+
for i in range(1, int(np.max(list(node2bin.keys())) + 1)):
126+
bin_ = node2bin[i]
127+
chrom, start = bin_.split(":")
128+
chrom_list.append(chrom_name.index(chrom))
129+
chrom_start.append(int(start))
130+
chrom_end.append(int(start) + res)
131+
132+
print (np.array(chrom_list), np.array(chrom_start))
133+
cooler_bin.create_dataset("chrom", data = chrom_list)
134+
cooler_bin.create_dataset("start", data = chrom_start)
135+
cooler_bin.create_dataset("end", data = chrom_end)
136+
137+
cooler_chrom = grp.create_group("chroms")
138+
cooler_chrom.create_dataset("name", data = [l.encode('utf8') for l in chrom_name], dtype = h5py.special_dtype(vlen=str))
139+
140+
cooler_pixels = grp.create_group("pixels")
141+
bin_id1 = []
142+
bin_id2 = []
143+
balanced = []
144+
145+
146+
147+
for i in range(len(chrom_name)):
82148
pair_wise = generate_pair_wise(i)
149+
print (pair_wise)
150+
bin_id1.append(np.copy(pair_wise[:, 0]) - 1)
151+
bin_id2.append(np.copy(pair_wise[:, 1]) - 1)
83152
# print (pair_wise.shape, pair_wise)
84153
proba = predict(classifier_model, pair_wise).reshape((-1))
85154
if task_mode == 'class':
86155
proba = torch.sigmoid(torch.from_numpy(proba)).numpy()
87156
else:
88157
proba = F.softplus(torch.from_numpy(proba)).numpy()
89-
print ( np.sum(proba >= 0.5) ,proba.shape)
158+
# print ( np.sum(proba >= 0.5) ,proba.shape)
90159

91160
pair_wise_weight = np.array([origin[e[0 ] -1 ,e[1 ] -1] for e in tqdm(pair_wise)])
92161

93162
my_proba = proba2matrix(pair_wise, None, proba)
94-
coverage = np.sqrt(np.sum(my_proba, axis=-1))
95-
my_proba = my_proba / coverage.reshape((-1, 1))
96-
my_proba = my_proba / coverage.reshape((1, -1))
97-
163+
coverage1 = np.sqrt(np.mean(my_proba, axis=-1, keepdims=True))
164+
coverage2 = np.sqrt(np.mean(my_proba, axis=0, keepdims=True))
165+
my_proba = my_proba / (coverage1 + 1e-15)
166+
my_proba = my_proba / (coverage2 + 1e-15)
167+
98168
origin_part = proba2matrix(pair_wise, None, pair_wise_weight)
169+
gap1 = np.sum(origin_part, axis=-1) == 0
170+
gap2 = np.sum(origin_part, axis=0) == 0
171+
coverage1 = np.sqrt(np.mean(origin_part, axis=-1, keepdims=True))
172+
coverage2 = np.sqrt(np.mean(origin_part, axis=0, keepdims=True))
173+
origin_part = origin_part / (coverage1+ 1e-15)
174+
origin_part = origin_part / (coverage2 + 1e-15)
175+
176+
99177
my = my_proba * origin_part
100-
101-
gap = np.sum(origin_part, axis=-1) == 0
102-
my[gap ,:] = 0.0
103-
my[:, gap] = 0.0
104-
my_proba[gap ,:] = 0.0
105-
my_proba[:, gap] = 0.0
178+
my = np.maximum(my_proba * origin_part, my_proba)
179+
coverage1 = np.sqrt(np.mean(my, axis=-1, keepdims=True))
180+
coverage2 = np.sqrt(np.mean(my, axis=0, keepdims=True))
181+
my = my / (coverage1 + 1e-15)
182+
my = my / (coverage2 + 1e-15)
183+
184+
185+
my[gap1 ,:] = 0.0
186+
my[:, gap2] = 0.0
187+
my_proba[gap1 ,:] = 0.0
188+
my_proba[:, gap2] = 0.0
106189

107190
my = transformer.fit_transform(my.reshape((-1, 1))).reshape((len(my), -1))
108191
origin_part = transformer.fit_transform(origin_part.reshape((-1, 1))).reshape((len(origin_part), -1))
@@ -115,8 +198,14 @@ def predict(model, input):
115198
ax = sns.heatmap(my, cmap="Reds", square=True, mask=mask ,cbar=False, vmin=vmin, vmax=vmax)
116199
ax.get_xaxis().set_visible(False)
117200
ax.get_yaxis().set_visible(False)
118-
plt.savefig("../chr%d_denoise.png" %(i+1), dpi=300)
201+
plt.savefig("../%s_denoise.png" %chrom_name[i], dpi=300)
119202
plt.close(fig)
203+
204+
205+
min_ = np.min(pair_wise)
206+
# print (pair_wise[:, 0] - min_, pair_wise[:, 1] - min_, my.shape)
207+
value = my[pair_wise[:, 0] - min_, pair_wise[:, 1] - min_]
208+
balanced.append(value)
120209

121210
# fig = plt.figure(figsize=(5, 5))
122211
# plt.subplots_adjust(left=0.0, right=1.0, top=1.0, bottom=0.0)
@@ -135,7 +224,13 @@ def predict(model, input):
135224
ax = sns.heatmap(origin_part, cmap="Reds", square=True, mask=mask ,cbar=False, vmin=vmin, vmax=vmax)
136225
ax.get_xaxis().set_visible(False)
137226
ax.get_yaxis().set_visible(False)
138-
plt.savefig("../chr%d_origin.png" %(i+1), dpi=300)
227+
plt.savefig("../%s_origin.png" %chrom_name[i], dpi=300)
139228
plt.close(fig)
140229

141230

231+
bin_id1 = np.concatenate(bin_id1, axis=0)
232+
bin_id2 = np.concatenate(bin_id2, axis=0)
233+
balanced = np.concatenate(balanced, axis=0)
234+
cooler_pixels.create_dataset("bin1_id", data=bin_id1)
235+
cooler_pixels.create_dataset("bin2_id", data=bin_id2)
236+
cooler_pixels.create_dataset("balanced", data=balanced)

0 commit comments

Comments
 (0)