13
13
from past .builtins import zip
14
14
from builtins import super
15
15
from .matrixFile import MatrixFile
16
+ import math
17
+ import time
16
18
17
- from hicmatrix .utilities import toString
19
+ from hicmatrix .utilities import toString , toBytes
18
20
from hicmatrix .utilities import convertNansToOnes
19
21
from hicmatrix ._version import __version__
20
22
@@ -36,12 +38,15 @@ def __init__(self, pMatrixFile=None):
36
38
37
39
self .hic2cool_version = None
38
40
self .hicmatrix_version = None
41
+ self .scaleToOriginalRange = None
39
42
40
43
def getInformationCoolerBinNames (self ):
41
44
return cooler .Cooler (self .matrixFileName ).bins ().columns .values
42
45
43
46
def load (self ):
44
47
log .debug ('Load in cool format' )
48
+ self .minValue = None
49
+ self .maxValue = None
45
50
if self .matrixFileName is None :
46
51
log .info ('No matrix is initialized' )
47
52
@@ -58,7 +63,7 @@ def load(self):
58
63
log .info ('The following file was tried to open: {}' .format (self .matrixFileName ))
59
64
log .info ("The following nodes are available: {}" .format (cooler .fileops .list_coolers (self .matrixFileName .split ("::" )[0 ])))
60
65
exit ()
61
-
66
+ log . debug ( 'self.chrnameList {}' . format ( self . chrnameList ))
62
67
if self .chrnameList is None :
63
68
matrixDataFrame = cooler_file .matrix (balance = False , sparse = True , as_pixels = True )
64
69
used_dtype = np .int32
@@ -84,6 +89,9 @@ def load(self):
84
89
features [start_pos :start_pos + len (_features )] = _features
85
90
start_pos += len (_features )
86
91
i += size
92
+ del _data
93
+ del _instances
94
+ del _features
87
95
88
96
# log.debug('max feature {}'.format(np.max(features)))
89
97
# log.debug('max instance {}'.format(np.max(instances)))
@@ -92,13 +100,19 @@ def load(self):
92
100
# log.debug('cooler_file.info[\'nbins\'] {}'.format(type(cooler_file.info['nbins'])))
93
101
94
102
matrix = csr_matrix ((data , (instances , features )), shape = (np .int (cooler_file .info ['nbins' ]), np .int (cooler_file .info ['nbins' ])), dtype = count_dtype )
95
- # del data
96
- # del instances
97
- # del features
103
+ self .minValue = data .min ()
104
+ self .maxValue = data .max ()
105
+
106
+ del data
107
+ del instances
108
+ del features
98
109
else :
99
110
if len (self .chrnameList ) == 1 :
100
111
try :
112
+ # self.chrnameList[0]
101
113
matrix = cooler_file .matrix (balance = False , sparse = True ).fetch (self .chrnameList [0 ]).tocsr ()
114
+ self .minValue = matrix .data .min ()
115
+ self .maxValue = matrix .data .max ()
102
116
except ValueError :
103
117
exit ("Wrong chromosome format. Please check UCSC / ensembl notation." )
104
118
else :
@@ -175,11 +189,35 @@ def load(self):
175
189
elif self .correctionOperator == '/' :
176
190
matrix .data /= instances_factors
177
191
178
- cut_intervals = []
192
+ # if self.scaleToOriginalRange is not None:
193
+ min_value = matrix .data .min ()
194
+ max_value = matrix .data .max ()
195
+ # check if max smaller one or if not same mangnitude
196
+ if max_value < 1 or (np .absolute (int (math .log10 (max_value )) - int (math .log10 (self .maxValue ))) > 1 ):
197
+ desired_range_difference = self .maxValue - self .minValue
198
+
199
+ min_value = matrix .data .min ()
200
+ max_value = matrix .data .max ()
201
+
202
+ matrix .data = (matrix .data - min_value )
203
+ matrix .data /= (max_value - min_value )
204
+ matrix .data *= desired_range_difference
205
+ matrix .data += self .minValue
206
+ self .scaleToOriginalRange = True
207
+ # diff_scale_factor = matrix.data.max() / max_value
208
+ # if self.correctionOperator == '*':
209
+ # correction_factors *= diff_scale_factor
210
+ # if self.correctionOperator == '/':
211
+ # correction_factors /= diff_scale_factor
179
212
213
+ cut_intervals = []
214
+ time_start = time .time ()
215
+ log .debug ('Creating cut_intervals {}' .format (time_start ))
180
216
for values in cut_intervals_data_frame .values :
181
217
cut_intervals .append (tuple ([toString (values [0 ]), values [1 ], values [2 ], 1.0 ]))
182
-
218
+ log .debug ('Creating cut_intervals {} DONE' .format (time .time () - time_start ))
219
+ del cut_intervals_data_frame
220
+ del correction_factors_data_frame
183
221
# try to restore nan_bins.
184
222
try :
185
223
shape = matrix .shape [0 ] if matrix .shape [0 ] < matrix .shape [1 ] else matrix .shape [1 ]
@@ -266,6 +304,16 @@ def save(self, pFileName, pSymmetric=True, pApplyCorrection=True):
266
304
log .debug ('self.correctionOperator: {}' .format (self .correctionOperator ))
267
305
log .debug ('self.fileWasH5: {}' .format (self .fileWasH5 ))
268
306
307
+ if self .scaleToOriginalRange :
308
+ min_value = self .matrix .data .min ()
309
+ max_value = self .matrix .data .max ()
310
+ desired_range_difference = max_value - min_value
311
+
312
+ self .matrix .data = (self .matrix .data - self .minValue )
313
+ self .matrix .data /= (self .maxValue - self .minValue )
314
+ self .matrix .data *= desired_range_difference
315
+ self .matrix .data += min_value
316
+
269
317
if self .correctionOperator == '*' or self .correctionOperator is None :
270
318
self .matrix .data /= instances_factors
271
319
elif self .correctionOperator == '/' or self .fileWasH5 :
@@ -276,6 +324,11 @@ def save(self, pFileName, pSymmetric=True, pApplyCorrection=True):
276
324
277
325
self .matrix .eliminate_zeros ()
278
326
327
+ if self .correction_factors is not None and pApplyCorrection is False :
328
+ dtype_pixel ['weight' ] = np .float32
329
+ weight = convertNansToOnes (np .array (self .correction_factors ).flatten ())
330
+ bins_data_frame = bins_data_frame .assign (weight = weight )
331
+
279
332
instances , features = self .matrix .nonzero ()
280
333
281
334
matrix_data_frame = pd .DataFrame (instances , columns = ['bin1_id' ], dtype = np .int32 )
@@ -348,7 +401,7 @@ def save(self, pFileName, pSymmetric=True, pApplyCorrection=True):
348
401
metadata = self .hic_metadata ,
349
402
temp_dir = local_temp_dir )
350
403
351
- log .debug ('info {}' .format (info ))
404
+ # log.debug('info {}'.format(info))
352
405
if self .appendData == 'w' :
353
406
fileName = pFileName .split ('::' )[0 ]
354
407
with h5py .File (fileName , 'r+' ) as h5file :
0 commit comments