Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Point the API to HuggingFace datasets #2119

Merged
merged 1 commit into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Pointing the API directly to HuggingFace data downloads.
15 changes: 10 additions & 5 deletions policyengine_api/jobs/calculate_economy_simulation_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@

reform_impacts_service = ReformImpactsService()

ENHANCED_FRS = "hf://policyengine/policyengine-uk-data/enhanced_frs_2022_23.h5"
FRS = "hf://policyengine/policyengine-uk-data/frs_2022_23.h5"

ENHANCED_CPS = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"
CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5"
POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5"


class CalculateEconomySimulationJob(BaseJob):
def __init__(self):
Expand Down Expand Up @@ -247,6 +254,7 @@ def _create_simulation_uk(

simulation = Microsimulation(
reform=reform,
dataset=ENHANCED_FRS,
)
simulation.default_calculation_period = time_period
if region != "uk":
Expand Down Expand Up @@ -283,24 +291,21 @@ def _create_simulation_us(
# for running a simulation with the "enhanced_us" region
if dataset in DATASETS or region == "enhanced_us":
print(f"Running an enhanced CPS simulation")
from policyengine_us_data import EnhancedCPS_2024

sim_options["dataset"] = EnhancedCPS_2024
sim_options["dataset"] = ENHANCED_CPS

# Handle region settings; need to be mindful not to place
# legacy enhanced_us region in this block
if region not in ["us", "enhanced_us"]:
print(f"Filtering US dataset down to region {region}")

from policyengine_us_data import Pooled_3_Year_CPS_2023

# This is only run to allow for filtering by region
# Check to see if we've declared a dataset and use that
# to filter down by region
if "dataset" in sim_options:
filter_dataset = sim_options["dataset"]
else:
filter_dataset = Pooled_3_Year_CPS_2023
filter_dataset = POOLED_CPS

# Run sim to filter by region
region_sim = Microsimulation(
Expand Down
Loading