Skip to content

Commit 87a5b4c

Browse files
committed
update to v1.6.4
1 parent 1363b3c commit 87a5b4c

File tree

1 file changed

+77
-67
lines changed

1 file changed

+77
-67
lines changed

src/zol/fai.py

Lines changed: 77 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,70 +1121,79 @@ def identify_gc_instances(
11211121
all_hgs = set(lt_to_hg.values()) if lt_to_hg else set()
11221122
key_hgs = query_information["key_hgs"]
11231123

1124-
# Create DenseHMM - using the pomegrenate library
1125-
gc_hg_probs = [gc_emission_prob_without_hit]
1126-
bg_hg_probs = [1.0 - gc_emission_prob_without_hit]
1127-
model_labels = ["background"]
1128-
for hg in all_hgs:
1129-
model_labels.append(hg)
1130-
gc_hg_probs.append(gc_emission_prob_with_hit)
1131-
bg_hg_probs.append(1.0 - gc_emission_prob_with_hit)
1132-
1133-
gc_cat = Categorical([gc_hg_probs])
1134-
bg_cat = Categorical([bg_hg_probs])
1135-
1136-
model = DenseHMM()
1137-
model.add_distributions([gc_cat, bg_cat])
1138-
1139-
gc_to_gc = gc_to_gc_transition_prob
1140-
gc_to_bg = 1.0 - gc_to_gc_transition_prob
1141-
bg_to_bg = bg_to_bg_transition_prob
1142-
bg_to_gc = 1.0 - bg_to_bg_transition_prob
1143-
1144-
start_to_gc = 0.5
1145-
start_to_bg = 0.5
1146-
gc_to_end = 0.5
1147-
bg_to_end = 0.5
1148-
1149-
model.add_edge(model.start, gc_cat, start_to_gc)
1150-
model.add_edge(model.start, bg_cat, start_to_bg)
1151-
model.add_edge(gc_cat, model.end, gc_to_end)
1152-
model.add_edge(bg_cat, model.end, bg_to_end)
1153-
model.add_edge(gc_cat, gc_cat, gc_to_gc)
1154-
model.add_edge(gc_cat, bg_cat, gc_to_bg)
1155-
model.add_edge(bg_cat, gc_cat, bg_to_gc)
1156-
model.add_edge(bg_cat, bg_cat, bg_to_bg)
1157-
1158-
# Test HMM model for multiprocessing safety
1159-
hmm_model_safe = True
1160-
if platform_type in ['linux', 'macos']:
1161-
try:
1162-
import pickle
1163-
log_object.info("Testing HMM model for multiprocessing safety...")
1164-
1165-
# Test 1: Check if model can be pickled/unpickled
1166-
test_model = pickle.dumps(model)
1167-
test_model = pickle.loads(test_model)
1168-
1169-
# Test 2: Check if model can perform predictions after pickling
1170-
test_seq = numpy.array([[[0]]]) # Simple test sequence
1171-
test_prediction = test_model.predict(test_seq)
1172-
1173-
# Test 3: Check if model works with actual data structure
1174-
if len(model_labels) > 1:
1175-
test_hg_seq = numpy.array([[[model_labels.index("background")]]])
1176-
test_hmm_pred = test_model.predict(test_hg_seq)
1177-
1178-
log_object.info("HMM model passed multiprocessing safety tests")
1179-
hmm_model_safe = True
1180-
1181-
except Exception as e:
1182-
log_object.warning(f"HMM model failed multiprocessing safety test: {e}")
1183-
log_object.warning("Falling back to single-threaded processing for HMM operations")
1184-
hmm_model_safe = False
1185-
else:
1186-
# For other platforms, assume not safe
1187-
hmm_model_safe = False
1124+
hmm_model_safe_or_doesnt_matter = True
1125+
model = None
1126+
model_labels = None
1127+
if gc_delineation_mode == "HMM":
1128+
# Create DenseHMM - using the pomegrenate library
1129+
gc_hg_probs = [gc_emission_prob_without_hit]
1130+
bg_hg_probs = [1.0 - gc_emission_prob_without_hit]
1131+
model_labels = ["background"]
1132+
for hg in all_hgs:
1133+
model_labels.append(hg)
1134+
gc_hg_probs.append(gc_emission_prob_with_hit)
1135+
bg_hg_probs.append(1.0 - gc_emission_prob_with_hit)
1136+
1137+
gc_cat = Categorical([gc_hg_probs])
1138+
bg_cat = Categorical([bg_hg_probs])
1139+
1140+
model = DenseHMM()
1141+
model.add_distributions([gc_cat, bg_cat])
1142+
1143+
gc_to_gc = gc_to_gc_transition_prob
1144+
gc_to_bg = 1.0 - gc_to_gc_transition_prob
1145+
bg_to_bg = bg_to_bg_transition_prob
1146+
bg_to_gc = 1.0 - bg_to_bg_transition_prob
1147+
1148+
start_to_gc = 0.5
1149+
start_to_bg = 0.5
1150+
gc_to_end = 0.5
1151+
bg_to_end = 0.5
1152+
1153+
model.add_edge(model.start, gc_cat, start_to_gc)
1154+
model.add_edge(model.start, bg_cat, start_to_bg)
1155+
model.add_edge(gc_cat, model.end, gc_to_end)
1156+
model.add_edge(bg_cat, model.end, bg_to_end)
1157+
model.add_edge(gc_cat, gc_cat, gc_to_gc)
1158+
model.add_edge(gc_cat, bg_cat, gc_to_bg)
1159+
model.add_edge(bg_cat, gc_cat, bg_to_gc)
1160+
model.add_edge(bg_cat, bg_cat, bg_to_bg)
1161+
1162+
# Test HMM model for multiprocessing safety
1163+
if platform_type in ['linux', 'macos']:
1164+
try:
1165+
import pickle
1166+
log_object.info("Testing HMM model for multiprocessing safety...")
1167+
1168+
# Test 1: Check if model can be pickled/unpickled
1169+
test_model = pickle.dumps(model)
1170+
test_model = pickle.loads(test_model)
1171+
1172+
# Test 2: Check if model can perform predictions after pickling
1173+
test_seq = numpy.array([[[0]]]) # Simple test sequence
1174+
test_prediction = test_model.predict(test_seq)
1175+
1176+
# Test 3: Check if model works with actual data structure
1177+
if len(model_labels) > 1:
1178+
test_hg_seq = numpy.array([[[model_labels.index("background")]]])
1179+
test_hmm_pred = test_model.predict(test_hg_seq)
1180+
1181+
msg = "HMM model passed multiprocessing safety tests"
1182+
log_object.info(msg)
1183+
sys.stdout.write(msg + '\n')
1184+
1185+
except Exception as e:
1186+
msg = f"HMM model failed multiprocessing safety test: {e}"
1187+
log_object.warning(msg)
1188+
sys.stdout.write(msg + '\n')
1189+
msg = "Falling back to single-threaded processing for HMM operations"
1190+
log_object.warning(msg)
1191+
sys.stdout.write(msg + '\n')
1192+
hmm_model_safe_or_doesnt_matter = False
1193+
else:
1194+
# For other platforms, assume not safe
1195+
hmm_model_safe_or_doesnt_matter = False
1196+
11881197

11891198
gc_hmm_evalues_file = (
11901199
work_dir + "GeneCluster_NewInstances_HMMEvalues.txt"
@@ -1305,7 +1314,7 @@ def identify_gc_instances(
13051314
sys.stdout.write(msg + '\n')
13061315

13071316
# Use multiprocessing for Linux/macOS with safe HMM model, otherwise use single-threaded processing
1308-
if platform_type in ['linux', 'macos'] and hmm_model_safe:
1317+
if platform_type in ['linux', 'macos'] and hmm_model_safe_or_doesnt_matter:
13091318
try:
13101319
p = multiprocessing.Pool(threads)
13111320
for _ in tqdm.tqdm(
@@ -1392,8 +1401,9 @@ def _identify_gc_instances_worker(input_args):
13921401
assert sample_lt_to_evalue is not None
13931402
assert boundary_genes is not None
13941403
assert lt_to_hg is not None
1395-
assert model_labels is not None
1396-
assert model is not None
1404+
if gc_delineation_mode == "HMM":
1405+
assert model_labels is not None
1406+
assert model is not None
13971407

13981408
if single_query_mode:
13991409
with open(gc_info_dir + sample + ".bgcs.txt", "w") as gc_sample_listing_handle:

0 commit comments

Comments
 (0)