3
3
import os
4
4
from pathlib import Path
5
5
6
+ import dask
7
+ import dask .dataframe as dd
8
+ import large_image
6
9
import numpy as np
10
+ import pandas as pd
11
+
12
+ import histomicstk .segmentation .label as htk_seg_label
13
+ import histomicstk .segmentation .nuclear as htk_nuclear
7
14
8
15
try :
9
16
import joblib
20
27
logging .basicConfig (level = logging .CRITICAL )
21
28
22
29
30
+ def set_reference_values (args ):
31
+ """
32
+ Set reference values and configuration parameters for feature extraction.
33
+
34
+ Args:
35
+ args (dict): Configuration parameters for feature extraction.
36
+
37
+ Returns:
38
+ dict: Updated configuration parameters with reference values set.
39
+ """
40
+ args .reference_mu_lab = [8.63234435 , - 0.11501964 , 0.03868433 ]
41
+ args .reference_std_lab = [0.57506023 , 0.10403329 , 0.01364062 ]
42
+ args .foreground_threshold = 60
43
+ args .min_radius = 6
44
+ args .max_radius = 20
45
+ args .min_nucleus_area = 80
46
+ args .local_max_search_radius = 10
47
+ args .nuclei_annotation_format = "boundary"
48
+ args .stain_1 = "hematoxylin"
49
+ args .stain_1_vector = [- 1.0 , - 1.0 , - 1.0 ]
50
+ args .stain_2 = "eosin"
51
+ args .stain_2_vector = [- 1.0 , - 1.0 , - 1.0 ]
52
+ args .stain_3 = "null"
53
+ args .stain_3_vector = [- 1.0 , - 1.0 , - 1.0 ]
54
+ args .ignore_border_nuclei = False
55
+ args .cyto_width = 8
56
+ args .cytoplasm_features = True
57
+ args .fsd_bnd_pts = 128
58
+ args .fsd_features = True
59
+ args .fsd_freq_bins = 6
60
+ args .gradient_features = True
61
+ args .haralick_features = True
62
+ args .morphometry_features = True
63
+ args .intensity_features = True
64
+ args .gradient_features = True
65
+ args .fsd_features = True
66
+ args .num_glcm_levels = 32
67
+ args .min_fgnd_frac = .25
68
+ args .analysis_roi = None
69
+ return args
70
+
71
+
23
72
def gen_distinct_rgb_colors (n , seed = None ):
24
73
"""
25
74
Generates N visually distinct RGB colors
@@ -57,8 +106,94 @@ def gen_distinct_rgb_colors(n, seed=None):
57
106
return color_list
58
107
59
108
109
+ def process_feature_and_annotation (args ):
110
+ """
111
+ Process nuclei feature extraction and annotation from an input image.
112
+
113
+ Args:
114
+ args (dict): Configuration parameters for feature extraction.
115
+
116
+ Returns:
117
+ tuple: A tuple containing nuclei annotations (list) and feature data (Dask DataFrame).
118
+ """
119
+
120
+ print ('>> Generating features and annotation' )
121
+
122
+ #
123
+ # Set arguments required for nuclei feature extraction
124
+ #
125
+ args = set_reference_values (args )
126
+ tile_overlap = (args .max_radius + 1 ) * 4
127
+ it_kwargs = {'tile_overlap' : {'x' : tile_overlap , 'y' : tile_overlap }}
128
+
129
+ #
130
+ # Read Input Image
131
+ #
132
+ print ('\n >> Reading input image ... \n ' )
133
+
134
+ ts = large_image .getTileSource (args .inputImageFile )
135
+
136
+ ts_metadata = ts .getMetadata ()
137
+
138
+ print (json .dumps (ts_metadata , indent = 2 ))
139
+
140
+ src_mu_lab = None
141
+ src_sigma_lab = None
142
+
143
+ #
144
+ # Detect and compute nuclei features in parallel using Dask
145
+ #
146
+ print ('\n >> Detecting nuclei and computing features ...\n ' )
147
+
148
+ tile_result_list = []
149
+
150
+ for tile in ts .tileIterator (** it_kwargs ):
151
+
152
+ # detect nuclei
153
+ cur_result = dask .delayed (htk_nuclear .detect_tile_nuclei )(
154
+ tile ,
155
+ args ,
156
+ src_mu_lab , src_sigma_lab ,
157
+ return_fdata = True
158
+ )
159
+
160
+ # append result to list
161
+ tile_result_list .append (cur_result )
162
+
163
+ tile_result_list = dask .delayed (tile_result_list ).compute ()
164
+
165
+ nuclei_annot_list = [annot
166
+ for annot_list , fdata in tile_result_list
167
+ for annot in annot_list ]
168
+
169
+ # remove overlapping nuclei
170
+ nuclei_annot_list = htk_seg_label .remove_overlap_nuclei (
171
+ nuclei_annot_list , args .nuclei_annotation_format )
172
+
173
+ nuclei_fdata = pd .DataFrame ()
174
+
175
+ if len (nuclei_annot_list ) > 0 :
176
+
177
+ nuclei_fdata = pd .concat ([
178
+ fdata
179
+ for annot_list , fdata in tile_result_list if fdata is not None ],
180
+ ignore_index = True
181
+ )
182
+ # Fill any instances with NaN as zero
183
+ df = pd .DataFrame (nuclei_fdata ).fillna (0 )
184
+ return nuclei_annot_list , dd .from_pandas (df , npartitions = 1 )
185
+
186
+
60
187
def read_feature_file (args ):
61
- import dask .dataframe as dd
188
+ """
189
+ Read nuclei feature data from a specified file.
190
+
191
+ Args:
192
+ args (dict): Configuration parameters including the input feature file path.
193
+
194
+ Returns:
195
+ dask.dataframe.DataFrame: A Dask DataFrame containing the nuclei feature data.
196
+ """
62
197
63
198
fname , feature_file_format = os .path .splitext (args .inputNucleiFeatureFile )
64
199
@@ -73,20 +208,11 @@ def read_feature_file(args):
73
208
else :
74
209
raise ValueError ('Extension of output feature file must be .csv or .h5' )
75
210
76
- return ddf
77
-
78
-
79
- def check_args (args ):
80
-
81
- if not os .path .isfile (args .inputImageFile ):
82
- raise OSError ('Input image file does not exist.' )
83
-
84
- if not os .path .isfile (args .inputModelFile ):
85
- raise OSError ('Input model file does not exist.' )
211
+ # Fill any instances with NaN as zero
212
+ return ddf .fillna (0 )
86
213
87
214
88
215
def main (args ):
89
- import pandas as pd
90
216
91
217
print ('\n >> CLI Parameters ...\n ' )
92
218
@@ -105,37 +231,38 @@ def main(args):
105
231
# read model file
106
232
#
107
233
print ('\n >> Loading classification model ...\n ' )
108
-
109
234
clf_model = joblib .load (args .inputModelFile )
110
235
111
- #
112
- # read feature file
113
- #
114
- print ('\n >> Loading nuclei feature file ...\n ' )
236
+ if args .inputNucleiFeatureFile and args .inputNucleiAnnotationFile :
115
237
116
- ddf = read_feature_file (args )
238
+ # read feature file
239
+ print ('\n >> Loading nuclei feature file ...\n ' )
117
240
118
- if len ( ddf . columns ) != clf_model . n_features_in_ :
241
+ ddf = read_feature_file ( args )
119
242
120
- raise ValueError ('The number of features of the classification model '
121
- 'and the input feature file do not match.' )
243
+ if len (ddf .columns ) != clf_model .n_features_in_ :
122
244
123
- #
124
- # read nuclei annotation file
125
- #
126
- print ('\n >> Loading nuclei annotation file ...\n ' )
245
+ raise ValueError ('The number of features of the classification model '
246
+ 'and the input feature file do not match.' )
247
+
248
+ #
249
+ # read nuclei annotation file
250
+ #
251
+ print ('\n >> Loading nuclei annotation file ...\n ' )
127
252
128
- with open (args .inputNucleiAnnotationFile ) as f :
253
+ with open (args .inputNucleiAnnotationFile ) as f :
129
254
130
- annotation_data = json .load (f )
131
- nuclei_annot_list = annotation_data .get (
132
- 'elements' , annotation_data .get (
133
- 'annotation' , {}).get ('elements' ))
255
+ annotation_data = json .load (f )
256
+ nuclei_annot_list = annotation_data .get (
257
+ 'elements' , annotation_data .get (
258
+ 'annotation' , {}).get ('elements' ))
134
259
135
- if len (nuclei_annot_list ) != len (ddf .index ):
260
+ if len (nuclei_annot_list ) != len (ddf .index ):
136
261
137
- raise ValueError ('The number of nuclei in the feature file and the '
138
- 'annotation file do not match' )
262
+ raise ValueError ('The number of nuclei in the feature file and the '
263
+ 'annotation file do not match' )
264
+ else :
265
+ nuclei_annot_list , ddf = process_feature_and_annotation (args )
139
266
140
267
#
141
268
# Perform nuclei classification
0 commit comments