Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sourcery Starbot ⭐ refactored n8sty/policeShootings #1

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
ds = pd.read_csv(ds_url)

def shape_check(df, cols):
check = df.shape[1] == cols
return(check)
return df.shape[1] == cols
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function shape_check refactored with the following changes:


if shape_check(fe, 23) == False:
print('data frame has incorrect number of columns')
Expand Down Expand Up @@ -58,8 +57,8 @@ def try_pop(lst, index = -1):
names = names.map(lambda x: str(x).rsplit(' '))
last_name = names.map(lambda x: try_pop(x, index = -1))
first_name = names.map(lambda x: try_pop(x, index = 0))
middle_name = names.map(lambda x: str(' '.join(x)))
middle_name = names.map(lambda x: ' '.join(x))

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function name_processor refactored with the following changes:

return(last_name, first_name, middle_name, suffixes)


Expand Down Expand Up @@ -104,22 +103,15 @@ def try_pop(lst, index = -1):
def shootings_df_process(df_name, col_mappings):
df = eval(df_name)
length = df.shape[0]

cols_names = list(col_mappings[df_name].keys())
cols_select = list(col_mappings[df_name].values())
# create an empty dataframe as a destination for data
df_output = pd.DataFrame(columns = col_mapping[df_name].keys())

cnt = 0
for col in cols_select:
if col == None:
col_values = [None] * length
else:
col_values = df[col]


for cnt, col in enumerate(cols_select):
col_values = [None] * length if col is None else df[col]
df_output[cols_names[cnt]] = col_values
cnt += 1

Comment on lines -107 to -122
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function shootings_df_process refactored with the following changes:

return(df_output)


Expand Down
26 changes: 13 additions & 13 deletions update_shootings_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,27 @@ def log_activity(log_input, log_file):
# http://goo.gl/IIee2
import logging
from time import strftime

timestamp = strftime("%Y-%m-%d-%H:%M:%S")

# will create or add to the log file
logging.basicConfig(filename = log_file, level = logging.DEBUG)
logging.info(timestamp + ' ' + log_input) # add in a new line character for easier reading

logging.info(f'{timestamp} {log_input}')
Comment on lines -23 to +29
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function log_activity refactored with the following changes:

This removes the following comments ( why? ):

# add in a new line character for easier reading



def load_google_shootings_csv(url, log_str, log_file, num_cols_check):
from pandas import read_csv

df = read_csv(url)

if(df.shape[0] >= 0 and df.shape[1] == num_cols_check):
outcome = log_str + ' success'
if (df.shape[0] >= 0 and df.shape[1] == num_cols_check):
outcome = f'{log_str} success'
else:
outcome = log_str + ' failure'
outcome = f'{log_str} failure'

log_activity(outcome, log_file)

Comment on lines -34 to +43
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function load_google_shootings_csv refactored with the following changes:

return(df)

# def action_success_failure(logical_test):
Expand Down Expand Up @@ -84,11 +84,11 @@ def create_connect_db(db_name, db_schema, log_file):
def update_db(db_name, tbl_to_update, df, if_exists = 'replace', log_file):
from sqlite3 import connect
from pandas import to_sql

con = lite.connect(db_name)
tbl_to_update.to_sql(name = tbl_to_update, con = con, flavor = 'sqlite', if_exists = if_exists, chunksize = 50)
log_activity(tbl_to_update + ' updated using method ' + if_exists, log_file)
log_activity(f'{tbl_to_update} updated using method {if_exists}', log_file)

Comment on lines -87 to +91
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function update_db refactored with the following changes:

con.close()

update_db('police_shootings.sqlite', 'raw_fatal_encounters', df_fe, 'replace', 'activity.log')
Expand Down