Skip to content

Commit 9c79810

Browse files
authored
Nuclei classification can run without feature / annotation files (#1051)
1 parent 845b569 commit 9c79810

File tree

2 files changed

+164
-37
lines changed

2 files changed

+164
-37
lines changed

histomicstk/cli/NucleiClassification/NucleiClassification.py

100644100755
Lines changed: 160 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import os
44
from pathlib import Path
55

6+
import dask
7+
import dask.dataframe as dd
8+
import large_image
69
import numpy as np
10+
import pandas as pd
11+
12+
import histomicstk.segmentation.label as htk_seg_label
13+
import histomicstk.segmentation.nuclear as htk_nuclear
714

815
try:
916
import joblib
@@ -20,6 +27,48 @@
2027
logging.basicConfig(level=logging.CRITICAL)
2128

2229

30+
def set_reference_values(args):
31+
"""
32+
Set reference values and configuration parameters for feature extraction.
33+
34+
Args:
35+
args (dict): Configuration parameters for feature extraction.
36+
37+
Returns:
38+
dict: Updated configuration parameters with reference values set.
39+
"""
40+
args.reference_mu_lab = [8.63234435, -0.11501964, 0.03868433]
41+
args.reference_std_lab = [0.57506023, 0.10403329, 0.01364062]
42+
args.foreground_threshold = 60
43+
args.min_radius = 6
44+
args.max_radius = 20
45+
args.min_nucleus_area = 80
46+
args.local_max_search_radius = 10
47+
args.nuclei_annotation_format = "boundary"
48+
args.stain_1 = "hematoxylin"
49+
args.stain_1_vector = [-1.0, -1.0, -1.0]
50+
args.stain_2 = "eosin"
51+
args.stain_2_vector = [-1.0, -1.0, -1.0]
52+
args.stain_3 = "null"
53+
args.stain_3_vector = [-1.0, -1.0, -1.0]
54+
args.ignore_border_nuclei = False
55+
args.cyto_width = 8
56+
args.cytoplasm_features = True
57+
args.fsd_bnd_pts = 128
58+
args.fsd_features = True
59+
args.fsd_freq_bins = 6
60+
args.gradient_features = True
61+
args.haralick_features = True
62+
args.morphometry_features = True
63+
args.intensity_features = True
64+
args.gradient_features = True
65+
args.fsd_features = True
66+
args.num_glcm_levels = 32
67+
args.min_fgnd_frac = .25
68+
args.analysis_roi = None
69+
return args
70+
71+
2372
def gen_distinct_rgb_colors(n, seed=None):
2473
"""
2574
Generates N visually distinct RGB colors
@@ -57,8 +106,94 @@ def gen_distinct_rgb_colors(n, seed=None):
57106
return color_list
58107

59108

109+
def process_feature_and_annotation(args):
110+
"""
111+
Process nuclei feature extraction and annotation from an input image.
112+
113+
Args:
114+
args (dict): Configuration parameters for feature extraction.
115+
116+
Returns:
117+
tuple: A tuple containing nuclei annotations (list) and feature data (Dask DataFrame).
118+
"""
119+
120+
print('>> Generating features and annotation')
121+
122+
#
123+
# Set arguments required for nuclei feature extraction
124+
#
125+
args = set_reference_values(args)
126+
tile_overlap = (args.max_radius + 1) * 4
127+
it_kwargs = {'tile_overlap': {'x': tile_overlap, 'y': tile_overlap}}
128+
129+
#
130+
# Read Input Image
131+
#
132+
print('\n>> Reading input image ... \n')
133+
134+
ts = large_image.getTileSource(args.inputImageFile)
135+
136+
ts_metadata = ts.getMetadata()
137+
138+
print(json.dumps(ts_metadata, indent=2))
139+
140+
src_mu_lab = None
141+
src_sigma_lab = None
142+
143+
#
144+
# Detect and compute nuclei features in parallel using Dask
145+
#
146+
print('\n>> Detecting nuclei and computing features ...\n')
147+
148+
tile_result_list = []
149+
150+
for tile in ts.tileIterator(**it_kwargs):
151+
152+
# detect nuclei
153+
cur_result = dask.delayed(htk_nuclear.detect_tile_nuclei)(
154+
tile,
155+
args,
156+
src_mu_lab, src_sigma_lab,
157+
return_fdata=True
158+
)
159+
160+
# append result to list
161+
tile_result_list.append(cur_result)
162+
163+
tile_result_list = dask.delayed(tile_result_list).compute()
164+
165+
nuclei_annot_list = [annot
166+
for annot_list, fdata in tile_result_list
167+
for annot in annot_list]
168+
169+
# remove overlapping nuclei
170+
nuclei_annot_list = htk_seg_label.remove_overlap_nuclei(
171+
nuclei_annot_list, args.nuclei_annotation_format)
172+
173+
nuclei_fdata = pd.DataFrame()
174+
175+
if len(nuclei_annot_list) > 0:
176+
177+
nuclei_fdata = pd.concat([
178+
fdata
179+
for annot_list, fdata in tile_result_list if fdata is not None],
180+
ignore_index=True
181+
)
182+
# Fill any instances with NaN as zero
183+
df = pd.DataFrame(nuclei_fdata).fillna(0)
184+
return nuclei_annot_list, dd.from_pandas(df, npartitions=1)
185+
186+
60187
def read_feature_file(args):
61-
import dask.dataframe as dd
188+
"""
189+
Read nuclei feature data from a specified file.
190+
191+
Args:
192+
args (dict): Configuration parameters including the input feature file path.
193+
194+
Returns:
195+
dask.dataframe.DataFrame: A Dask DataFrame containing the nuclei feature data.
196+
"""
62197

63198
fname, feature_file_format = os.path.splitext(args.inputNucleiFeatureFile)
64199

@@ -73,20 +208,11 @@ def read_feature_file(args):
73208
else:
74209
raise ValueError('Extension of output feature file must be .csv or .h5')
75210

76-
return ddf
77-
78-
79-
def check_args(args):
80-
81-
if not os.path.isfile(args.inputImageFile):
82-
raise OSError('Input image file does not exist.')
83-
84-
if not os.path.isfile(args.inputModelFile):
85-
raise OSError('Input model file does not exist.')
211+
# Fill any instances with NaN as zero
212+
return ddf.fillna(0)
86213

87214

88215
def main(args):
89-
import pandas as pd
90216

91217
print('\n>> CLI Parameters ...\n')
92218

@@ -105,37 +231,38 @@ def main(args):
105231
# read model file
106232
#
107233
print('\n>> Loading classification model ...\n')
108-
109234
clf_model = joblib.load(args.inputModelFile)
110235

111-
#
112-
# read feature file
113-
#
114-
print('\n>> Loading nuclei feature file ...\n')
236+
if args.inputNucleiFeatureFile and args.inputNucleiAnnotationFile:
115237

116-
ddf = read_feature_file(args)
238+
# read feature file
239+
print('\n>> Loading nuclei feature file ...\n')
117240

118-
if len(ddf.columns) != clf_model.n_features_in_:
241+
ddf = read_feature_file(args)
119242

120-
raise ValueError('The number of features of the classification model '
121-
'and the input feature file do not match.')
243+
if len(ddf.columns) != clf_model.n_features_in_:
122244

123-
#
124-
# read nuclei annotation file
125-
#
126-
print('\n>> Loading nuclei annotation file ...\n')
245+
raise ValueError('The number of features of the classification model '
246+
'and the input feature file do not match.')
247+
248+
#
249+
# read nuclei annotation file
250+
#
251+
print('\n>> Loading nuclei annotation file ...\n')
127252

128-
with open(args.inputNucleiAnnotationFile) as f:
253+
with open(args.inputNucleiAnnotationFile) as f:
129254

130-
annotation_data = json.load(f)
131-
nuclei_annot_list = annotation_data.get(
132-
'elements', annotation_data.get(
133-
'annotation', {}).get('elements'))
255+
annotation_data = json.load(f)
256+
nuclei_annot_list = annotation_data.get(
257+
'elements', annotation_data.get(
258+
'annotation', {}).get('elements'))
134259

135-
if len(nuclei_annot_list) != len(ddf.index):
260+
if len(nuclei_annot_list) != len(ddf.index):
136261

137-
raise ValueError('The number of nuclei in the feature file and the '
138-
'annotation file do not match')
262+
raise ValueError('The number of nuclei in the feature file and the '
263+
'annotation file do not match')
264+
else:
265+
nuclei_annot_list, ddf = process_feature_and_annotation(args)
139266

140267
#
141268
# Perform nuclei classification

histomicstk/cli/NucleiClassification/NucleiClassification.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<version>0.1.0</version>
77
<documentation-url>https://digitalslidearchive.github.io/HistomicsTK/</documentation-url>
88
<license>Apache 2.0</license>
9-
<contributor>Deepak Roy Chittajallu (Kitware), Neal Siekierski (Kitware)</contributor>
9+
<contributor>Deepak Roy Chittajallu (Kitware), Neal Siekierski (Kitware), Subin Erattakulangara (Kitware)</contributor>
1010
<acknowledgements>This work is part of the HistomicsTK project.</acknowledgements>
1111
<parameters>
1212
<label>IO</label>
@@ -29,21 +29,21 @@
2929
<name>inputNucleiFeatureFile</name>
3030
<label>Input Nuclei Feature File</label>
3131
<channel>input</channel>
32-
<index>2</index>
32+
<longflag>feature-file</longflag>
3333
<description>Input nuclei feature file (*.csv, *.h5) containing the features of all nuclei to be classified</description>
3434
</file>
3535
<file fileExtensions=".anot">
3636
<name>inputNucleiAnnotationFile</name>
3737
<label>Input Nuclei Annotation File</label>
3838
<channel>input</channel>
39-
<index>3</index>
39+
<longflag>annotation-file</longflag>
4040
<description>Input nuclei annotation file (*.anot) containing nuclei annotations in the same order as their features in the feature file</description>
4141
</file>
4242
<file fileExtensions=".anot" reference="inputImageFile">
4343
<name>outputNucleiAnnotationFile</name>
4444
<label>Output Nuclei Annotation File</label>
4545
<channel>output</channel>
46-
<index>4</index>
46+
<index>2</index>
4747
<description>Output nuclei annotation file (*.anot) with the same nuclei in input nuclei annotation file if provided) with nuclei sorted into groups based on class and accompanied by heatmaps of the classification probabilities</description>
4848
</file>
4949
</parameters>

0 commit comments

Comments
 (0)