Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MaskHIT_Prep/03_get_patches_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def extract_tcga_info(path):
# TODO: This should work but need to check with real data.
df['id_patient'], df['type'] = zip(*df['file'].apply(extract_tcga_info))
else:
#TODO: This behavior seems dangerous. Shouldn't we have a separated script
#and describe the input files definition so we don't need to update code
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I agree, I think we can define id_patient in the dataset class itself.

#based on a dataset?
df['id_patient'] = df['id_svs']
df['type'] = '01Z'

Expand Down
15 changes: 15 additions & 0 deletions MaskHIT_Prep/04_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,21 @@ def main():

model = create_model(num_layers, True, 1)
model.fc = nn.Identity()

# User-defined feature extractor
if config.patch.pretrained_model_path:
# Load the pretrained model state dict
pretrained_dict = torch.load(config.patch.pretrained_model_path)

if isinstance(pretrained_dict, nn.Module):
# If it's a full model, convert to a state dict from it
pretrained_dict = pretrained_dict.state_dict()

# Filter out `fc` layer from the pretrained_dict
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k not in ['fc.weight', 'fc.bias']}
# Load the filtered state dict into your model
model.load_state_dict(pretrained_dict, strict=False)

model.cuda()
model.eval()

Expand Down
1 change: 1 addition & 0 deletions MaskHIT_Prep/05_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def get_counts_chunck(data, delta):
df_svs = df_svs.merge(df_sum, on='id_svs', how='inner')
df_svs = df_svs.loc[df_svs.valid > 25]
df_svs.to_pickle(svs_meta)
df_svs.head(2).to_csv(str(svs_meta).replace('.pickle', '_preview.txt'), index=False, sep='\t')

print("")
print("=" * 40)
Expand Down
34 changes: 29 additions & 5 deletions MaskHIT_Prep/configs/config_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
# (custom): more likely to need modification for each user
#---------------------------------------------------

#TODO: Clarify which values are minimally required.
#Suggestions:
# <SET_YOUR_OWN_VALUE> for required values.
# null for optional value. When use null, don't add typehint.
# For null-able values, we should not add typehint.
# Instead, note the type in the comment: (optional, string)

study:
## Study Configuration
Expand All @@ -22,23 +28,29 @@ study:
# (1)
# Indicates a dataset class for json processing pipeline
# See datasets/ directory for available classes.
# Available: TCGA, GramStains, IBD
# Please implement a dataset file if necessary
# Currently available: TCGA, GramStains, IBD, RCCp
# Please implement a dataset file if necessary.
dataset_type: !!str <SET_YOUR_OWN_VALUE>

# (0,1)
# Path to the folder containing svs data
# Example: /pool2/data/WSI_TCGA/Colorectal
# Set to "tiles" if using tile preprocessing
# This can be left unset if you have a JAON file with each slide path
# and properly parsed in your dataset class.
svs_dir: !!str <SET_YOUR_OWN_VALUE>

# (1)
# Path to the json file with dataset info.
# Please read README to prepare a JSON file.
json_path: !!str <SET_YOUR_OWN_VALUE>

# (1)
# Dataset field to use with stratification.
stratify_by: !!str status
# Dataset field to use with stratification (Optional[String]).
# Set null for no stratification.
#TODO: Change the default value from 'status'. It should be <SET_YOUR_OWN_VALUE>
#and add examples?.
stratify_by: null

# (1)
# Number of folds for kfold cross validation
Expand Down Expand Up @@ -100,6 +112,7 @@ patch:
# (2, 3, 5)
# Path to the pickle file containing metadata for the .svs files
# Example: meta/dhmc_rcc_svs.pickle
# TODO: This is a saving path, right? We should clarify that this is not input.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, a meta-file is created by 01_get_svs_meta.py and then is used by other pre-processing scripts such as 02_patch_extraction.py

svs_meta: !!str <SET_YOUR_OWN_VALUE>

# (2, 4)
Expand All @@ -108,7 +121,17 @@ patch:

# (4, 5)
# Backbone model for feature extraction
backbone: resnet_18
# Currently, only models from the ResNet family are available.
# Default: resnet_18, using ImageNet features.
# Replace with another ResNet model if necessary.
backbone: !!str resnet_18

# (4, 5)
# Path to a custom feature extractor snapshot (optional, string)
# Optional: Set this only if you have a specialized feature extractor,
# possibly pretrained on a relevant dataset.
# Default: Uses the standard ImageNet pretrained model.
pretrained_model_path: null

# (4)
# Batch size for processing slide patches
Expand All @@ -122,6 +145,7 @@ patch:
# (2)
# Mask magnification for color filtering during patch extraction.
# A lower magnification will run faster but will be less precise
#TODO: Ask why this value?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure. Maybe we should give the user a choice to set this value themselves?

mag_mask: !!float 0.3125

feature:
Expand Down