From 6e37d49239da11a8f1aa8f5142bbcbb5fe8b43ae Mon Sep 17 00:00:00 2001 From: Sourcery AI Date: Tue, 26 Sep 2023 02:34:12 +0000 Subject: [PATCH] 'Refactored by Sourcery' --- etl.py | 22 +++++++--------------- update_shootings_data.py | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/etl.py b/etl.py index 732d23e..7f37577 100644 --- a/etl.py +++ b/etl.py @@ -20,8 +20,7 @@ ds = pd.read_csv(ds_url) def shape_check(df, cols): - check = df.shape[1] == cols - return(check) + return df.shape[1] == cols if shape_check(fe, 23) == False: print('data frame has incorrect number of columns') @@ -58,8 +57,8 @@ def try_pop(lst, index = -1): names = names.map(lambda x: str(x).rsplit(' ')) last_name = names.map(lambda x: try_pop(x, index = -1)) first_name = names.map(lambda x: try_pop(x, index = 0)) - middle_name = names.map(lambda x: str(' '.join(x))) - + middle_name = names.map(lambda x: ' '.join(x)) + return(last_name, first_name, middle_name, suffixes) @@ -104,22 +103,15 @@ def try_pop(lst, index = -1): def shootings_df_process(df_name, col_mappings): df = eval(df_name) length = df.shape[0] - + cols_names = list(col_mappings[df_name].keys()) cols_select = list(col_mappings[df_name].values()) # create an empty dataframe as a destination for data df_output = pd.DataFrame(columns = col_mapping[df_name].keys()) - - cnt = 0 - for col in cols_select: - if col == None: - col_values = [None] * length - else: - col_values = df[col] - + + for cnt, col in enumerate(cols_select): + col_values = [None] * length if col is None else df[col] df_output[cols_names[cnt]] = col_values - cnt += 1 - return(df_output) diff --git a/update_shootings_data.py b/update_shootings_data.py index bec8426..c049489 100644 --- a/update_shootings_data.py +++ b/update_shootings_data.py @@ -20,27 +20,27 @@ def log_activity(log_input, log_file): # http://goo.gl/IIee2 import logging from time import strftime - + timestamp = strftime("%Y-%m-%d-%H:%M:%S") - + # will create or add to the log file logging.basicConfig(filename = log_file, level = logging.DEBUG) - - logging.info(timestamp + ' ' + log_input) # add in a new line character for easier reading + + logging.info(f'{timestamp} {log_input}') def load_google_shootings_csv(url, log_str, log_file, num_cols_check): from pandas import read_csv - + df = read_csv(url) - if(df.shape[0] >= 0 and df.shape[1] == num_cols_check): - outcome = log_str + ' success' + if (df.shape[0] >= 0 and df.shape[1] == num_cols_check): + outcome = f'{log_str} success' else: - outcome = log_str + ' failure' - + outcome = f'{log_str} failure' + log_activity(outcome, log_file) - + return(df) # def action_success_failure(logical_test): @@ -84,11 +84,11 @@ def create_connect_db(db_name, db_schema, log_file): def update_db(db_name, tbl_to_update, df, if_exists = 'replace', log_file): from sqlite3 import connect from pandas import to_sql - + con = lite.connect(db_name) tbl_to_update.to_sql(name = tbl_to_update, con = con, flavor = 'sqlite', if_exists = if_exists, chunksize = 50) - log_activity(tbl_to_update + ' updated using method ' + if_exists, log_file) - + log_activity(f'{tbl_to_update} updated using method {if_exists}', log_file) + con.close() update_db('police_shootings.sqlite', 'raw_fatal_encounters', df_fe, 'replace', 'activity.log')