|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Refresh Bill Status from OpenStates API |
| 4 | +
|
| 5 | +Updates legislative stage, last action, and status for tracked bills |
| 6 | +in the processed_bills table. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + export $(grep -v '^#' .env | xargs) |
| 10 | +
|
| 11 | + # Refresh all scored, non-skipped bills (highest score first) |
| 12 | + python scripts/refresh_bill_status.py |
| 13 | +
|
| 14 | + # Refresh specific state |
| 15 | + python scripts/refresh_bill_status.py --state NY |
| 16 | +
|
| 17 | + # Limit number of API calls (respect 250/day limit) |
| 18 | + python scripts/refresh_bill_status.py --limit 50 |
| 19 | +
|
| 20 | + # Dry run — show what would be updated |
| 21 | + python scripts/refresh_bill_status.py --dry-run |
| 22 | +
|
| 23 | + # Include all bills (not just scored ones) |
| 24 | + python scripts/refresh_bill_status.py --all |
| 25 | +""" |
| 26 | + |
| 27 | +import os |
| 28 | +import sys |
| 29 | +import json |
| 30 | +import argparse |
| 31 | +import time |
| 32 | +import requests |
| 33 | + |
| 34 | +# ============== Configuration ============== |
| 35 | + |
| 36 | +OPENSTATES_API_KEY = os.environ.get("OPENSTATES_API_KEY") |
| 37 | +OPENSTATES_BASE_URL = "https://v3.openstates.org" |
| 38 | + |
| 39 | +# Legislative stage classification based on action classifications |
| 40 | +# Order matters — later stages override earlier ones |
| 41 | +STAGE_CLASSIFICATIONS = { |
| 42 | + "introduction": "introduced", |
| 43 | + "filing": "introduced", |
| 44 | + "referral-committee": "in_committee", |
| 45 | + "committee-passage": "passed_committee", |
| 46 | + "reading-1": "first_reading", |
| 47 | + "reading-2": "second_reading", |
| 48 | + "reading-3": "third_reading", |
| 49 | + "passage": "passed_chamber", |
| 50 | + "executive-receipt": "sent_to_governor", |
| 51 | + "executive-signature": "signed", |
| 52 | + "became-law": "signed", |
| 53 | + "executive-veto": "vetoed", |
| 54 | + "executive-veto-line-item": "vetoed", |
| 55 | + "failure": "dead", |
| 56 | + "withdrawal": "dead", |
| 57 | +} |
| 58 | + |
| 59 | +# Numeric ordering for stages (higher = further along) |
| 60 | +STAGE_ORDER = { |
| 61 | + "prefiled": 0, |
| 62 | + "introduced": 1, |
| 63 | + "in_committee": 2, |
| 64 | + "passed_committee": 3, |
| 65 | + "first_reading": 4, |
| 66 | + "second_reading": 5, |
| 67 | + "third_reading": 6, |
| 68 | + "passed_chamber": 7, |
| 69 | + "passed_both": 8, |
| 70 | + "sent_to_governor": 9, |
| 71 | + "signed": 10, |
| 72 | + "vetoed": 11, |
| 73 | + "dead": -1, |
| 74 | +} |
| 75 | + |
| 76 | +# Stage display labels |
| 77 | +STAGE_LABELS = { |
| 78 | + "prefiled": "Pre-filed", |
| 79 | + "introduced": "Introduced", |
| 80 | + "in_committee": "In Committee", |
| 81 | + "passed_committee": "Passed Committee", |
| 82 | + "first_reading": "First Reading", |
| 83 | + "second_reading": "Second Reading", |
| 84 | + "third_reading": "Third Reading", |
| 85 | + "passed_chamber": "Passed One Chamber", |
| 86 | + "passed_both": "Passed Both Chambers", |
| 87 | + "sent_to_governor": "Sent to Governor", |
| 88 | + "signed": "Signed into Law", |
| 89 | + "vetoed": "Vetoed", |
| 90 | + "dead": "Dead/Withdrawn", |
| 91 | +} |
| 92 | + |
| 93 | + |
| 94 | +def openstates_request(endpoint, params=None, max_retries=3): |
| 95 | + """Make a request to the OpenStates API v3 with retry on rate limit.""" |
| 96 | + if not OPENSTATES_API_KEY: |
| 97 | + raise ValueError("OPENSTATES_API_KEY environment variable not set") |
| 98 | + |
| 99 | + headers = {"X-API-KEY": OPENSTATES_API_KEY} |
| 100 | + url = f"{OPENSTATES_BASE_URL}{endpoint}" |
| 101 | + |
| 102 | + for attempt in range(max_retries): |
| 103 | + response = requests.get(url, headers=headers, params=params or {}) |
| 104 | + |
| 105 | + if response.status_code == 429: |
| 106 | + wait = 15 * (attempt + 1) |
| 107 | + print(f" Rate limited, waiting {wait}s...") |
| 108 | + time.sleep(wait) |
| 109 | + continue |
| 110 | + |
| 111 | + if response.status_code == 404: |
| 112 | + return None |
| 113 | + |
| 114 | + response.raise_for_status() |
| 115 | + return response.json() |
| 116 | + |
| 117 | + response = requests.get(url, headers=headers, params=params or {}) |
| 118 | + if response.status_code == 429: |
| 119 | + print(f" Still rate limited after {max_retries} retries, skipping") |
| 120 | + return None |
| 121 | + response.raise_for_status() |
| 122 | + return response.json() |
| 123 | + |
| 124 | + |
| 125 | +def classify_stage(actions): |
| 126 | + """ |
| 127 | + Determine legislative stage from a list of actions. |
| 128 | +
|
| 129 | + Tracks both chambers — if a bill passes in both, it's "passed_both". |
| 130 | + """ |
| 131 | + stage = "introduced" |
| 132 | + chambers_passed = set() |
| 133 | + |
| 134 | + for action in actions: |
| 135 | + classifications = action.get("classification", []) |
| 136 | + org = action.get("organization", {}) |
| 137 | + chamber = org.get("classification", "") # "upper" or "lower" |
| 138 | + |
| 139 | + for cls in classifications: |
| 140 | + mapped = STAGE_CLASSIFICATIONS.get(cls) |
| 141 | + if not mapped: |
| 142 | + continue |
| 143 | + |
| 144 | + if mapped == "passed_chamber": |
| 145 | + chambers_passed.add(chamber) |
| 146 | + if len(chambers_passed) >= 2: |
| 147 | + stage = "passed_both" |
| 148 | + elif STAGE_ORDER.get(mapped, 0) > STAGE_ORDER.get(stage, 0): |
| 149 | + stage = mapped |
| 150 | + elif mapped == "dead": |
| 151 | + stage = "dead" |
| 152 | + elif STAGE_ORDER.get(mapped, 0) > STAGE_ORDER.get(stage, 0): |
| 153 | + stage = mapped |
| 154 | + |
| 155 | + return stage |
| 156 | + |
| 157 | + |
| 158 | +def search_bill_on_openstates(state_name, bill_number): |
| 159 | + """ |
| 160 | + Search for a bill by state + identifier on OpenStates. |
| 161 | + Returns the bill detail with actions, or None. |
| 162 | + """ |
| 163 | + # Clean bill number for search (e.g., "HB05133" -> "HB 5133", "SB0032" -> "SB 32") |
| 164 | + clean_num = bill_number.strip() |
| 165 | + |
| 166 | + params = { |
| 167 | + "jurisdiction": state_name, |
| 168 | + "q": clean_num, |
| 169 | + "per_page": 5, |
| 170 | + "include": "actions", |
| 171 | + } |
| 172 | + |
| 173 | + data = openstates_request("/bills", params) |
| 174 | + if not data or not data.get("results"): |
| 175 | + return None |
| 176 | + |
| 177 | + # Find best match by identifier |
| 178 | + for result in data["results"]: |
| 179 | + result_id = result.get("identifier", "").replace(" ", "").upper() |
| 180 | + search_id = clean_num.replace(" ", "").upper() |
| 181 | + # Strip leading zeros for comparison |
| 182 | + import re |
| 183 | + result_norm = re.sub(r'([A-Z]+)0*(\d+)', r'\1\2', result_id) |
| 184 | + search_norm = re.sub(r'([A-Z]+)0*(\d+)', r'\1\2', search_id) |
| 185 | + |
| 186 | + if result_norm == search_norm: |
| 187 | + return result |
| 188 | + |
| 189 | + # If no exact match, return first result as fallback |
| 190 | + return data["results"][0] if data["results"] else None |
| 191 | + |
| 192 | + |
| 193 | +def get_bill_detail(openstates_id): |
| 194 | + """Fetch full bill detail with actions.""" |
| 195 | + return openstates_request(f"/bills/{openstates_id}", {"include": "actions"}) |
| 196 | + |
| 197 | + |
| 198 | +# State abbreviation -> name mapping |
| 199 | +ABBR_TO_STATE = { |
| 200 | + "AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas", |
| 201 | + "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware", |
| 202 | + "FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho", |
| 203 | + "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas", |
| 204 | + "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland", |
| 205 | + "MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi", |
| 206 | + "MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada", |
| 207 | + "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York", |
| 208 | + "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma", |
| 209 | + "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina", |
| 210 | + "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah", |
| 211 | + "VT": "Vermont", "VA": "Virginia", "WA": "Washington", "WV": "West Virginia", |
| 212 | + "WI": "Wisconsin", "WY": "Wyoming", "DC": "District of Columbia", |
| 213 | +} |
| 214 | + |
| 215 | + |
| 216 | +def main(): |
| 217 | + parser = argparse.ArgumentParser(description="Refresh bill status from OpenStates") |
| 218 | + parser.add_argument("--state", help="Filter to specific state (e.g., NY)") |
| 219 | + parser.add_argument("--limit", type=int, default=100, help="Max bills to refresh (default: 100)") |
| 220 | + parser.add_argument("--dry-run", action="store_true", help="Show changes without writing") |
| 221 | + parser.add_argument("--all", action="store_true", help="Include all bills, not just scored ones") |
| 222 | + args = parser.parse_args() |
| 223 | + |
| 224 | + if not OPENSTATES_API_KEY: |
| 225 | + print("Error: OPENSTATES_API_KEY environment variable not set") |
| 226 | + return 1 |
| 227 | + |
| 228 | + try: |
| 229 | + from supabase import create_client |
| 230 | + except ImportError: |
| 231 | + print("Error: supabase package not installed") |
| 232 | + return 1 |
| 233 | + |
| 234 | + url = os.environ.get("SUPABASE_URL") |
| 235 | + key = os.environ.get("SUPABASE_KEY") |
| 236 | + if not url or not key: |
| 237 | + print("Error: SUPABASE_URL and SUPABASE_KEY required") |
| 238 | + return 1 |
| 239 | + |
| 240 | + supabase = create_client(url, key) |
| 241 | + |
| 242 | + # Fetch bills to refresh |
| 243 | + query = supabase.table("processed_bills") \ |
| 244 | + .select("id, bill_id, state, bill_number, title, status, last_action, last_action_date, confidence_score, legiscan_url") |
| 245 | + |
| 246 | + if not args.all: |
| 247 | + query = query.gt("confidence_score", 0) |
| 248 | + |
| 249 | + query = query.is_("skipped_reason", "null") |
| 250 | + |
| 251 | + if args.state: |
| 252 | + query = query.eq("state", args.state) |
| 253 | + |
| 254 | + query = query.order("confidence_score", desc=True) |
| 255 | + |
| 256 | + result = query.execute() |
| 257 | + bills = result.data[:args.limit] |
| 258 | + |
| 259 | + print(f"Bill Status Refresh") |
| 260 | + print(f"===================") |
| 261 | + print(f"Bills to refresh: {len(bills)} (of {len(result.data)} total)") |
| 262 | + if args.state: |
| 263 | + print(f"State filter: {args.state}") |
| 264 | + print(f"Dry run: {args.dry_run}") |
| 265 | + print() |
| 266 | + |
| 267 | + updated = 0 |
| 268 | + skipped = 0 |
| 269 | + errors = 0 |
| 270 | + |
| 271 | + for i, bill in enumerate(bills): |
| 272 | + state = bill["state"] |
| 273 | + bn = bill["bill_number"] |
| 274 | + state_name = ABBR_TO_STATE.get(state, state) |
| 275 | + |
| 276 | + print(f"[{i+1}/{len(bills)}] {state} {bn}...", end=" ", flush=True) |
| 277 | + |
| 278 | + try: |
| 279 | + # Search for the bill on OpenStates by state + bill number |
| 280 | + detail = search_bill_on_openstates(state_name, bn) |
| 281 | + |
| 282 | + if not detail: |
| 283 | + print("not found on OpenStates") |
| 284 | + skipped += 1 |
| 285 | + continue |
| 286 | + |
| 287 | + # Extract actions and classify stage |
| 288 | + actions = detail.get("actions", []) |
| 289 | + stage = classify_stage(actions) if actions else "introduced" |
| 290 | + |
| 291 | + # Get latest action info |
| 292 | + latest_action = detail.get("latest_action_description", "") |
| 293 | + latest_action_date = detail.get("latest_action_date", "") or None |
| 294 | + |
| 295 | + # Determine if anything changed |
| 296 | + old_action = bill.get("last_action", "") |
| 297 | + old_date = bill.get("last_action_date", "") |
| 298 | + |
| 299 | + stage_label = STAGE_LABELS.get(stage, stage) |
| 300 | + |
| 301 | + if latest_action == old_action and latest_action_date == old_date: |
| 302 | + print(f"{stage_label} (no change)") |
| 303 | + skipped += 1 |
| 304 | + else: |
| 305 | + print(f"{stage_label} | {latest_action[:50]} ({latest_action_date})") |
| 306 | + |
| 307 | + if not args.dry_run: |
| 308 | + update_data = { |
| 309 | + "last_action": latest_action, |
| 310 | + "last_action_date": latest_action_date, |
| 311 | + "status": stage_label, |
| 312 | + } |
| 313 | + |
| 314 | + supabase.table("processed_bills") \ |
| 315 | + .update(update_data) \ |
| 316 | + .eq("id", bill["id"]) \ |
| 317 | + .execute() |
| 318 | + |
| 319 | + updated += 1 |
| 320 | + |
| 321 | + except Exception as e: |
| 322 | + print(f"ERROR: {e}") |
| 323 | + errors += 1 |
| 324 | + |
| 325 | + # Rate limiting: 10 req/min on free tier |
| 326 | + if i < len(bills) - 1: |
| 327 | + time.sleep(7) |
| 328 | + |
| 329 | + print() |
| 330 | + print(f"Done!") |
| 331 | + print(f" Updated: {updated}") |
| 332 | + print(f" No change: {skipped}") |
| 333 | + print(f" Errors: {errors}") |
| 334 | + return 0 |
| 335 | + |
| 336 | + |
| 337 | +if __name__ == "__main__": |
| 338 | + sys.exit(main()) |
0 commit comments