Skip to content

Commit

Permalink
attempt number two at fixing stat yardage
Browse files Browse the repository at this point in the history
  • Loading branch information
akeaswaran committed Sep 1, 2024
1 parent d578302 commit 6ab746d
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 17 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version="0.0.36.2.14",
version="0.0.36.2.15",
description="Retrieve Sports data in Python",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
39 changes: 24 additions & 15 deletions sportsdataverse/cfb/cfb_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,7 @@ def play_text_dupe_checker(row):
pbp_txt["plays"]["type.text"],
)
del pbp_txt["plays"]["clock.mm"]

pbp_txt["plays"] = pbp_txt["plays"].replace({np.nan: None})
return pbp_txt

Expand Down Expand Up @@ -1634,6 +1635,25 @@ def __add_rush_pass_flags(self, play_df):
True,
False,
)
# bugged games - 2024 WK1
play_df['statYardage'] = np.select(
[
(play_df["pass"] == True)
& (play_df.text.str.contains(" complete to ", case=False))
& (play_df['statYardage'] == 0)
& (play_df['start.team.id'] != play_df['end.team.id']),

(play_df["pass"] == True)
& (play_df.text.str.contains(" complete to ", case=False))
& (play_df['statYardage'] == 0)
],
[
play_df['start.yardsToEndzone'] - (100 - play_df['end.yardsToEndzone']),

play_df['start.yardsToEndzone'] - play_df['end.yardsToEndzone']
],
default = play_df['statYardage']
)
# --- Sacks----
play_df["sack_vec"] = np.where(
(
Expand Down Expand Up @@ -2705,8 +2725,8 @@ def __add_yardage_cols(self, play_df):
& (play_df.text.str.contains(" complete to", case=False))#,
& (play_df.text.str.contains(" for .* y\w*ds?", regex = True, case = False)),

(play_df["pass"] == True)
& (play_df.text.str.contains(" complete to", case=False)) & (play_df.downs_turnover == True),
# (play_df["pass"] == True)
# & (play_df.text.str.contains(" complete to", case=False)) & (play_df.downs_turnover == True),

(play_df["pass"] == True)
& (play_df.text.str.contains(" complete to", case=False)),
Expand Down Expand Up @@ -2734,9 +2754,9 @@ def __add_yardage_cols(self, play_df):
.str.extract(r"(\d+)")[0]
.astype(float),

play_df['start.yardsToEndzone'] - (100 - play_df['end.yardsToEndzone']),
play_df['statYardage'], # play_df['start.yardsToEndzone'] - (100 - play_df['end.yardsToEndzone']),

play_df['start.yardsToEndzone'] - play_df['end.yardsToEndzone'],
# play_df['start.yardsToEndzone'] - play_df['end.yardsToEndzone'],

0.0,

Expand All @@ -2748,17 +2768,6 @@ def __add_yardage_cols(self, play_df):
],
default = None,
)
play_df['statYardage'] = np.select(
[
(play_df["pass"] == True)
& (play_df.text.str.contains(" complete to ", case=False))
& (play_df['statYardage'] == 0)
],
[
play_df['yds_receiving']
],
default = play_df['statYardage']
)

play_df["yds_int_return"] = None
play_df["yds_int_return"] = np.select(
Expand Down
35 changes: 34 additions & 1 deletion tests/cfb/test_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,4 +445,37 @@ def test_neb_24wk1():
# (plays['text'].str.contains(" pass complete ")) & (plays['text'].str.contains('Raiola '))
# ]
# LOGGER.info(bad_yards_play[["id", "text", "yds_passing", "yds_receiving", "statYardage", "start.yardsToEndzone", "end.yardsToEndzone", "yds_sacked"]].to_json(orient = "records", indent = 2))



# def test_okst_24wk1():
# test = CFBPlayProcess(gameId = 401634212)
# test.espn_cfb_pbp()
# json_dict_stuff = test.run_processing_pipeline()

# plays = test.plays_json

# LOGGER.info(
# plays[
# (plays["drive.id"] == "4016342128")
# ]["text"].to_json(orient = "records", indent = 2)
# )
# bad_yards_play = plays[
# (plays['text'].str.contains('Bowman ')) & (plays['pass'] == True)
# ]
# # LOGGER.info(bad_yards_play[["start.down", "start.distance", "text", "yds_passing", "yds_receiving", "statYardage", "start.yardsToEndzone", "start.team.id", "end.yardsToEndzone", "end.team.id", "yds_sacked", "downs_turnover", "dropback"]].to_json(orient = "records", indent = 2))
# # LOGGER.info(f"Bowman dropbacks: {len(bad_yards_play[(bad_yards_play.dropback == True)])}")
# # LOGGER.info(f"Bowman Q1 dropbacks: {len(bad_yards_play[(bad_yards_play.period == 1)])}")
# # LOGGER.info(f"Bowman Q2 dropbacks: {len(bad_yards_play[(bad_yards_play.period == 2)])}")
# # LOGGER.info(f"Bowman Q3 dropbacks: {len(bad_yards_play[(bad_yards_play.period == 3)])}")
# # LOGGER.info(f"Bowman Q4 dropbacks: {len(bad_yards_play[(bad_yards_play.period == 4)])}")

# # LOGGER.info(f"Bowman non-dropbacks:")
# # LOGGER.info(bad_yards_play[(bad_yards_play.dropback == False)][["start.down", "start.distance", "text", "yds_passing", "yds_receiving", "statYardage", "start.yardsToEndzone", "start.team.id", "end.yardsToEndzone", "end.team.id", "yds_sacked", "downs_turnover", "dropback"]].to_json(orient = "records", indent = 2))


# drive_agg = bad_yards_play.sort_values(by="game_play_number").groupby(by = ['drive.id'], as_index=False, group_keys = False).agg(dropback = ('dropback', sum)).to_json(orient = "records", indent = 2)
# LOGGER.info(f"Bowman dropbacks by drive: {drive_agg}")

# box = test.create_box_score()
# LOGGER.info(box['pass'][0])
# assert box['pass'][0]['Yds'] == 267.0 # PBP seems to be missing a 6-yd completion? - 01-Sept-2024

0 comments on commit 6ab746d

Please sign in to comment.