Skip to content

Commit

Permalink
Fix start_date adjustment to real subset
Browse files Browse the repository at this point in the history
  • Loading branch information
kvantricht committed Aug 21, 2024
1 parent f1573cb commit 750930b
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion presto/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,18 +392,24 @@ def process_parquet(df: pd.DataFrame) -> pd.DataFrame:
]
bands100m = ["METEO-precipitation_flux", "METEO-temperature_mean"]

# ----------------------------------------------------------------------------
# PLACEHOLDER for substituting start_date with one derived from crop calendars
# df['start_date'] = seasons.get_season_start(df[['lat','lon']])

# For now, in absence of a relevant start_date, we get time difference with respect
# to end_date so we can take 12 months counted back from end_date
df["valid_date_ind"] = (
(((df["timestamp"] - df["end_date"]).dt.days + 365) / 30).round().astype(int)
)

# once the start date is settled, we take 12 months from that as input to Presto
df_pivot = df[(df["valid_date_ind"] >= 0) & (df["valid_date_ind"] < 12)].pivot(
index=index_columns, columns="valid_date_ind", values=feature_columns
)

# Now reassign start_date to the actual subset counted back from end_date
df["start_date"] = df["end_date"] - pd.Timedelta(days=364)
# ----------------------------------------------------------------------------

if df_pivot.empty:
raise ValueError("Left with an empty DataFrame!")

Expand Down

0 comments on commit 750930b

Please sign in to comment.