@@ -1121,70 +1121,79 @@ def identify_gc_instances(
1121
1121
all_hgs = set (lt_to_hg .values ()) if lt_to_hg else set ()
1122
1122
key_hgs = query_information ["key_hgs" ]
1123
1123
1124
- # Create DenseHMM - using the pomegrenate library
1125
- gc_hg_probs = [gc_emission_prob_without_hit ]
1126
- bg_hg_probs = [1.0 - gc_emission_prob_without_hit ]
1127
- model_labels = ["background" ]
1128
- for hg in all_hgs :
1129
- model_labels .append (hg )
1130
- gc_hg_probs .append (gc_emission_prob_with_hit )
1131
- bg_hg_probs .append (1.0 - gc_emission_prob_with_hit )
1132
-
1133
- gc_cat = Categorical ([gc_hg_probs ])
1134
- bg_cat = Categorical ([bg_hg_probs ])
1135
-
1136
- model = DenseHMM ()
1137
- model .add_distributions ([gc_cat , bg_cat ])
1138
-
1139
- gc_to_gc = gc_to_gc_transition_prob
1140
- gc_to_bg = 1.0 - gc_to_gc_transition_prob
1141
- bg_to_bg = bg_to_bg_transition_prob
1142
- bg_to_gc = 1.0 - bg_to_bg_transition_prob
1143
-
1144
- start_to_gc = 0.5
1145
- start_to_bg = 0.5
1146
- gc_to_end = 0.5
1147
- bg_to_end = 0.5
1148
-
1149
- model .add_edge (model .start , gc_cat , start_to_gc )
1150
- model .add_edge (model .start , bg_cat , start_to_bg )
1151
- model .add_edge (gc_cat , model .end , gc_to_end )
1152
- model .add_edge (bg_cat , model .end , bg_to_end )
1153
- model .add_edge (gc_cat , gc_cat , gc_to_gc )
1154
- model .add_edge (gc_cat , bg_cat , gc_to_bg )
1155
- model .add_edge (bg_cat , gc_cat , bg_to_gc )
1156
- model .add_edge (bg_cat , bg_cat , bg_to_bg )
1157
-
1158
- # Test HMM model for multiprocessing safety
1159
- hmm_model_safe = True
1160
- if platform_type in ['linux' , 'macos' ]:
1161
- try :
1162
- import pickle
1163
- log_object .info ("Testing HMM model for multiprocessing safety..." )
1164
-
1165
- # Test 1: Check if model can be pickled/unpickled
1166
- test_model = pickle .dumps (model )
1167
- test_model = pickle .loads (test_model )
1168
-
1169
- # Test 2: Check if model can perform predictions after pickling
1170
- test_seq = numpy .array ([[[0 ]]]) # Simple test sequence
1171
- test_prediction = test_model .predict (test_seq )
1172
-
1173
- # Test 3: Check if model works with actual data structure
1174
- if len (model_labels ) > 1 :
1175
- test_hg_seq = numpy .array ([[[model_labels .index ("background" )]]])
1176
- test_hmm_pred = test_model .predict (test_hg_seq )
1177
-
1178
- log_object .info ("HMM model passed multiprocessing safety tests" )
1179
- hmm_model_safe = True
1180
-
1181
- except Exception as e :
1182
- log_object .warning (f"HMM model failed multiprocessing safety test: { e } " )
1183
- log_object .warning ("Falling back to single-threaded processing for HMM operations" )
1184
- hmm_model_safe = False
1185
- else :
1186
- # For other platforms, assume not safe
1187
- hmm_model_safe = False
1124
+ hmm_model_safe_or_doesnt_matter = True
1125
+ model = None
1126
+ model_labels = None
1127
+ if gc_delineation_mode == "HMM" :
1128
+ # Create DenseHMM - using the pomegrenate library
1129
+ gc_hg_probs = [gc_emission_prob_without_hit ]
1130
+ bg_hg_probs = [1.0 - gc_emission_prob_without_hit ]
1131
+ model_labels = ["background" ]
1132
+ for hg in all_hgs :
1133
+ model_labels .append (hg )
1134
+ gc_hg_probs .append (gc_emission_prob_with_hit )
1135
+ bg_hg_probs .append (1.0 - gc_emission_prob_with_hit )
1136
+
1137
+ gc_cat = Categorical ([gc_hg_probs ])
1138
+ bg_cat = Categorical ([bg_hg_probs ])
1139
+
1140
+ model = DenseHMM ()
1141
+ model .add_distributions ([gc_cat , bg_cat ])
1142
+
1143
+ gc_to_gc = gc_to_gc_transition_prob
1144
+ gc_to_bg = 1.0 - gc_to_gc_transition_prob
1145
+ bg_to_bg = bg_to_bg_transition_prob
1146
+ bg_to_gc = 1.0 - bg_to_bg_transition_prob
1147
+
1148
+ start_to_gc = 0.5
1149
+ start_to_bg = 0.5
1150
+ gc_to_end = 0.5
1151
+ bg_to_end = 0.5
1152
+
1153
+ model .add_edge (model .start , gc_cat , start_to_gc )
1154
+ model .add_edge (model .start , bg_cat , start_to_bg )
1155
+ model .add_edge (gc_cat , model .end , gc_to_end )
1156
+ model .add_edge (bg_cat , model .end , bg_to_end )
1157
+ model .add_edge (gc_cat , gc_cat , gc_to_gc )
1158
+ model .add_edge (gc_cat , bg_cat , gc_to_bg )
1159
+ model .add_edge (bg_cat , gc_cat , bg_to_gc )
1160
+ model .add_edge (bg_cat , bg_cat , bg_to_bg )
1161
+
1162
+ # Test HMM model for multiprocessing safety
1163
+ if platform_type in ['linux' , 'macos' ]:
1164
+ try :
1165
+ import pickle
1166
+ log_object .info ("Testing HMM model for multiprocessing safety..." )
1167
+
1168
+ # Test 1: Check if model can be pickled/unpickled
1169
+ test_model = pickle .dumps (model )
1170
+ test_model = pickle .loads (test_model )
1171
+
1172
+ # Test 2: Check if model can perform predictions after pickling
1173
+ test_seq = numpy .array ([[[0 ]]]) # Simple test sequence
1174
+ test_prediction = test_model .predict (test_seq )
1175
+
1176
+ # Test 3: Check if model works with actual data structure
1177
+ if len (model_labels ) > 1 :
1178
+ test_hg_seq = numpy .array ([[[model_labels .index ("background" )]]])
1179
+ test_hmm_pred = test_model .predict (test_hg_seq )
1180
+
1181
+ msg = "HMM model passed multiprocessing safety tests"
1182
+ log_object .info (msg )
1183
+ sys .stdout .write (msg + '\n ' )
1184
+
1185
+ except Exception as e :
1186
+ msg = f"HMM model failed multiprocessing safety test: { e } "
1187
+ log_object .warning (msg )
1188
+ sys .stdout .write (msg + '\n ' )
1189
+ msg = "Falling back to single-threaded processing for HMM operations"
1190
+ log_object .warning (msg )
1191
+ sys .stdout .write (msg + '\n ' )
1192
+ hmm_model_safe_or_doesnt_matter = False
1193
+ else :
1194
+ # For other platforms, assume not safe
1195
+ hmm_model_safe_or_doesnt_matter = False
1196
+
1188
1197
1189
1198
gc_hmm_evalues_file = (
1190
1199
work_dir + "GeneCluster_NewInstances_HMMEvalues.txt"
@@ -1305,7 +1314,7 @@ def identify_gc_instances(
1305
1314
sys .stdout .write (msg + '\n ' )
1306
1315
1307
1316
# Use multiprocessing for Linux/macOS with safe HMM model, otherwise use single-threaded processing
1308
- if platform_type in ['linux' , 'macos' ] and hmm_model_safe :
1317
+ if platform_type in ['linux' , 'macos' ] and hmm_model_safe_or_doesnt_matter :
1309
1318
try :
1310
1319
p = multiprocessing .Pool (threads )
1311
1320
for _ in tqdm .tqdm (
@@ -1392,8 +1401,9 @@ def _identify_gc_instances_worker(input_args):
1392
1401
assert sample_lt_to_evalue is not None
1393
1402
assert boundary_genes is not None
1394
1403
assert lt_to_hg is not None
1395
- assert model_labels is not None
1396
- assert model is not None
1404
+ if gc_delineation_mode == "HMM" :
1405
+ assert model_labels is not None
1406
+ assert model is not None
1397
1407
1398
1408
if single_query_mode :
1399
1409
with open (gc_info_dir + sample + ".bgcs.txt" , "w" ) as gc_sample_listing_handle :
0 commit comments