From 530a15e59dbd6864a88935fd52b51cc089bef717 Mon Sep 17 00:00:00 2001 From: Jenke Scheen Date: Tue, 8 Oct 2024 18:22:15 +0200 Subject: [PATCH] hotfix - need to offset nextstrain fitness indexing --- choppa/nextstrain.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/choppa/nextstrain.py b/choppa/nextstrain.py index 3c944ea..c118d4a 100644 --- a/choppa/nextstrain.py +++ b/choppa/nextstrain.py @@ -377,7 +377,9 @@ def count_mutations_events(metadata_df, gene): for mutation, count in mutations.items(): rows.append((position, mutation, count)) mutation_count_df = pd.DataFrame(rows, columns=["position", "mutation", "count"]) - mutation_count_df["position"] = mutation_count_df["position"].astype(int) + mutation_count_df["position"] = ( + mutation_count_df["position"].astype(int) - 1 + ) # correct for offset in NextStrain mutation_count_df = mutation_count_df.sort_values(by="position").reset_index( drop=True ) @@ -438,8 +440,10 @@ def finalize_dataframe( "frequency": frequency, } ) + # add all rows into a single dataframe ready for usage with choppa. choppa_nextstrain_df = pd.DataFrame(choppa_nextstrain_data) + if outfile: choppa_nextstrain_df.to_csv(outfile) return choppa_nextstrain_df