Skip to content

Commit

Permalink
add some more checks to CSV reader
Browse files Browse the repository at this point in the history
  • Loading branch information
JenkeScheen committed Apr 5, 2024
1 parent 0a2e20f commit 01a489c
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion choppa/IO/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,24 @@ def read_fitness_csv(
fitness_colname,
confidence_colname,
]]

# check that there aren't any NaNs and that fitness (and confidence) data is scalar
if fitness_df.isnull().values.any():
raise ValueError(f"Found missing values in input CSV: {fitness_df[fitness_df.isnull().any(axis=1)]}")
if len(fitness_df[pd.to_numeric(fitness_df[fitness_colname], errors='coerce').isnull()]) > 0:
raise ValueError(f"Found non-numeric fitness values in input CSV: {fitness_df[pd.to_numeric(fitness_df[fitness_colname], errors='coerce').isnull()]}")
if confidence_colname is not None:
if len(fitness_df[pd.to_numeric(fitness_df[confidence_colname], errors='coerce').isnull()]) > 0:
raise ValueError(f"Found non-numeric confidence values in input CSV: {fitness_df[pd.to_numeric(fitness_df[confidence_colname], errors='coerce').isnull()]}")

logger.info(f"Successfully read fitness data:\n{fitness_df}")

return fitness_df

def df_to_basedict(fitness_df):
"""
Converts a `pandas` fitness dataframe (read by `FitnessFactory.read_fitness_csv`) into
a fitness basedict.
a `fitness basedict` which is essentially just an `OrderedDict`.
"""


Expand Down

0 comments on commit 01a489c

Please sign in to comment.