n8sty · SourceryAI · Sep 26, 2023 · SourceryAI · Sep 26, 2023 · SourceryAI
diff --git a/etl.py b/etl.py
@@ -20,8 +20,7 @@
 ds = pd.read_csv(ds_url)
 
 def shape_check(df, cols):
-    check = df.shape[1] == cols
-    return(check)
+    return df.shape[1] == cols
 
 if shape_check(fe, 23) == False:
     print('data frame has incorrect number of columns')
@@ -58,8 +57,8 @@ def try_pop(lst, index = -1):
     names = names.map(lambda x: str(x).rsplit(' '))
     last_name = names.map(lambda x: try_pop(x, index = -1))
     first_name = names.map(lambda x: try_pop(x, index = 0))
-    middle_name = names.map(lambda x: str(' '.join(x)))
-    
+    middle_name = names.map(lambda x: ' '.join(x))
+
     return(last_name, first_name, middle_name, suffixes)
 
 
@@ -104,22 +103,15 @@ def try_pop(lst, index = -1):
 def shootings_df_process(df_name, col_mappings):
     df = eval(df_name)
     length = df.shape[0]
-    
+
     cols_names = list(col_mappings[df_name].keys())
     cols_select = list(col_mappings[df_name].values())
     # create an empty dataframe as a destination for data
     df_output = pd.DataFrame(columns = col_mapping[df_name].keys())
-
-    cnt = 0
-    for col in cols_select:
-        if col == None:
-            col_values = [None] * length
-        else:
-            col_values = df[col]
-
+
+    for cnt, col in enumerate(cols_select):
+        col_values = [None] * length if col is None else df[col]
         df_output[cols_names[cnt]] = col_values
-        cnt += 1
-
     return(df_output)
 
 

diff --git a/update_shootings_data.py b/update_shootings_data.py
@@ -20,27 +20,27 @@ def log_activity(log_input, log_file):
     # http://goo.gl/IIee2
     import logging
     from time import strftime            
-    
+
     timestamp = strftime("%Y-%m-%d-%H:%M:%S")
-    
+
     # will create or add to the log file
     logging.basicConfig(filename = log_file, level = logging.DEBUG)
-    
-    logging.info(timestamp + ' ' + log_input)  # add in a new line character for easier reading
+
+    logging.info(f'{timestamp} {log_input}')
 
 
 def load_google_shootings_csv(url, log_str, log_file, num_cols_check):
     from pandas import read_csv
-    
+
     df = read_csv(url)
 
-    if(df.shape[0] >= 0 and df.shape[1] == num_cols_check):
-        outcome = log_str + ' success'
+    if (df.shape[0] >= 0 and df.shape[1] == num_cols_check):
+        outcome = f'{log_str} success'
     else:
-        outcome = log_str + ' failure'
-        
+        outcome = f'{log_str} failure'
+
     log_activity(outcome, log_file)
-    
+
     return(df)
 
 # def action_success_failure(logical_test):
@@ -84,11 +84,11 @@ def create_connect_db(db_name, db_schema, log_file):
 def update_db(db_name, tbl_to_update, df, if_exists = 'replace', log_file):
     from sqlite3 import connect
     from pandas import to_sql    
-    
+
     con = lite.connect(db_name)
     tbl_to_update.to_sql(name = tbl_to_update, con = con, flavor = 'sqlite',  if_exists = if_exists, chunksize = 50)
-    log_activity(tbl_to_update + ' updated using method ' + if_exists, log_file)
-    
+    log_activity(f'{tbl_to_update} updated using method {if_exists}', log_file)
+
     con.close()
 
 update_db('police_shootings.sqlite', 'raw_fatal_encounters', df_fe, 'replace', 'activity.log')