Skip to content

Commit 9f592a7

Browse files
Merge pull request #158 from PolicyEngine/feature/bill-activity-feed
Add legislative activity feed and bill status refresh
2 parents 4a3b28d + 0925d86 commit 9f592a7

4 files changed

Lines changed: 1003 additions & 93 deletions

File tree

scripts/refresh_bill_status.py

Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Refresh Bill Status from OpenStates API
4+
5+
Updates legislative stage, last action, and status for tracked bills
6+
in the processed_bills table.
7+
8+
Usage:
9+
export $(grep -v '^#' .env | xargs)
10+
11+
# Refresh all scored, non-skipped bills (highest score first)
12+
python scripts/refresh_bill_status.py
13+
14+
# Refresh specific state
15+
python scripts/refresh_bill_status.py --state NY
16+
17+
# Limit number of API calls (respect 250/day limit)
18+
python scripts/refresh_bill_status.py --limit 50
19+
20+
# Dry run — show what would be updated
21+
python scripts/refresh_bill_status.py --dry-run
22+
23+
# Include all bills (not just scored ones)
24+
python scripts/refresh_bill_status.py --all
25+
"""
26+
27+
import os
28+
import sys
29+
import json
30+
import argparse
31+
import time
32+
import requests
33+
34+
# ============== Configuration ==============
35+
36+
OPENSTATES_API_KEY = os.environ.get("OPENSTATES_API_KEY")
37+
OPENSTATES_BASE_URL = "https://v3.openstates.org"
38+
39+
# Legislative stage classification based on action classifications
40+
# Order matters — later stages override earlier ones
41+
STAGE_CLASSIFICATIONS = {
42+
"introduction": "introduced",
43+
"filing": "introduced",
44+
"referral-committee": "in_committee",
45+
"committee-passage": "passed_committee",
46+
"reading-1": "first_reading",
47+
"reading-2": "second_reading",
48+
"reading-3": "third_reading",
49+
"passage": "passed_chamber",
50+
"executive-receipt": "sent_to_governor",
51+
"executive-signature": "signed",
52+
"became-law": "signed",
53+
"executive-veto": "vetoed",
54+
"executive-veto-line-item": "vetoed",
55+
"failure": "dead",
56+
"withdrawal": "dead",
57+
}
58+
59+
# Numeric ordering for stages (higher = further along)
60+
STAGE_ORDER = {
61+
"prefiled": 0,
62+
"introduced": 1,
63+
"in_committee": 2,
64+
"passed_committee": 3,
65+
"first_reading": 4,
66+
"second_reading": 5,
67+
"third_reading": 6,
68+
"passed_chamber": 7,
69+
"passed_both": 8,
70+
"sent_to_governor": 9,
71+
"signed": 10,
72+
"vetoed": 11,
73+
"dead": -1,
74+
}
75+
76+
# Stage display labels
77+
STAGE_LABELS = {
78+
"prefiled": "Pre-filed",
79+
"introduced": "Introduced",
80+
"in_committee": "In Committee",
81+
"passed_committee": "Passed Committee",
82+
"first_reading": "First Reading",
83+
"second_reading": "Second Reading",
84+
"third_reading": "Third Reading",
85+
"passed_chamber": "Passed One Chamber",
86+
"passed_both": "Passed Both Chambers",
87+
"sent_to_governor": "Sent to Governor",
88+
"signed": "Signed into Law",
89+
"vetoed": "Vetoed",
90+
"dead": "Dead/Withdrawn",
91+
}
92+
93+
94+
def openstates_request(endpoint, params=None, max_retries=3):
95+
"""Make a request to the OpenStates API v3 with retry on rate limit."""
96+
if not OPENSTATES_API_KEY:
97+
raise ValueError("OPENSTATES_API_KEY environment variable not set")
98+
99+
headers = {"X-API-KEY": OPENSTATES_API_KEY}
100+
url = f"{OPENSTATES_BASE_URL}{endpoint}"
101+
102+
for attempt in range(max_retries):
103+
response = requests.get(url, headers=headers, params=params or {})
104+
105+
if response.status_code == 429:
106+
wait = 15 * (attempt + 1)
107+
print(f" Rate limited, waiting {wait}s...")
108+
time.sleep(wait)
109+
continue
110+
111+
if response.status_code == 404:
112+
return None
113+
114+
response.raise_for_status()
115+
return response.json()
116+
117+
response = requests.get(url, headers=headers, params=params or {})
118+
if response.status_code == 429:
119+
print(f" Still rate limited after {max_retries} retries, skipping")
120+
return None
121+
response.raise_for_status()
122+
return response.json()
123+
124+
125+
def classify_stage(actions):
126+
"""
127+
Determine legislative stage from a list of actions.
128+
129+
Tracks both chambers — if a bill passes in both, it's "passed_both".
130+
"""
131+
stage = "introduced"
132+
chambers_passed = set()
133+
134+
for action in actions:
135+
classifications = action.get("classification", [])
136+
org = action.get("organization", {})
137+
chamber = org.get("classification", "") # "upper" or "lower"
138+
139+
for cls in classifications:
140+
mapped = STAGE_CLASSIFICATIONS.get(cls)
141+
if not mapped:
142+
continue
143+
144+
if mapped == "passed_chamber":
145+
chambers_passed.add(chamber)
146+
if len(chambers_passed) >= 2:
147+
stage = "passed_both"
148+
elif STAGE_ORDER.get(mapped, 0) > STAGE_ORDER.get(stage, 0):
149+
stage = mapped
150+
elif mapped == "dead":
151+
stage = "dead"
152+
elif STAGE_ORDER.get(mapped, 0) > STAGE_ORDER.get(stage, 0):
153+
stage = mapped
154+
155+
return stage
156+
157+
158+
def search_bill_on_openstates(state_name, bill_number):
159+
"""
160+
Search for a bill by state + identifier on OpenStates.
161+
Returns the bill detail with actions, or None.
162+
"""
163+
# Clean bill number for search (e.g., "HB05133" -> "HB 5133", "SB0032" -> "SB 32")
164+
clean_num = bill_number.strip()
165+
166+
params = {
167+
"jurisdiction": state_name,
168+
"q": clean_num,
169+
"per_page": 5,
170+
"include": "actions",
171+
}
172+
173+
data = openstates_request("/bills", params)
174+
if not data or not data.get("results"):
175+
return None
176+
177+
# Find best match by identifier
178+
for result in data["results"]:
179+
result_id = result.get("identifier", "").replace(" ", "").upper()
180+
search_id = clean_num.replace(" ", "").upper()
181+
# Strip leading zeros for comparison
182+
import re
183+
result_norm = re.sub(r'([A-Z]+)0*(\d+)', r'\1\2', result_id)
184+
search_norm = re.sub(r'([A-Z]+)0*(\d+)', r'\1\2', search_id)
185+
186+
if result_norm == search_norm:
187+
return result
188+
189+
# If no exact match, return first result as fallback
190+
return data["results"][0] if data["results"] else None
191+
192+
193+
def get_bill_detail(openstates_id):
194+
"""Fetch full bill detail with actions."""
195+
return openstates_request(f"/bills/{openstates_id}", {"include": "actions"})
196+
197+
198+
# State abbreviation -> name mapping
199+
ABBR_TO_STATE = {
200+
"AL": "Alabama", "AK": "Alaska", "AZ": "Arizona", "AR": "Arkansas",
201+
"CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware",
202+
"FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho",
203+
"IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas",
204+
"KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland",
205+
"MA": "Massachusetts", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi",
206+
"MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada",
207+
"NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York",
208+
"NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma",
209+
"OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina",
210+
"SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah",
211+
"VT": "Vermont", "VA": "Virginia", "WA": "Washington", "WV": "West Virginia",
212+
"WI": "Wisconsin", "WY": "Wyoming", "DC": "District of Columbia",
213+
}
214+
215+
216+
def main():
217+
parser = argparse.ArgumentParser(description="Refresh bill status from OpenStates")
218+
parser.add_argument("--state", help="Filter to specific state (e.g., NY)")
219+
parser.add_argument("--limit", type=int, default=100, help="Max bills to refresh (default: 100)")
220+
parser.add_argument("--dry-run", action="store_true", help="Show changes without writing")
221+
parser.add_argument("--all", action="store_true", help="Include all bills, not just scored ones")
222+
args = parser.parse_args()
223+
224+
if not OPENSTATES_API_KEY:
225+
print("Error: OPENSTATES_API_KEY environment variable not set")
226+
return 1
227+
228+
try:
229+
from supabase import create_client
230+
except ImportError:
231+
print("Error: supabase package not installed")
232+
return 1
233+
234+
url = os.environ.get("SUPABASE_URL")
235+
key = os.environ.get("SUPABASE_KEY")
236+
if not url or not key:
237+
print("Error: SUPABASE_URL and SUPABASE_KEY required")
238+
return 1
239+
240+
supabase = create_client(url, key)
241+
242+
# Fetch bills to refresh
243+
query = supabase.table("processed_bills") \
244+
.select("id, bill_id, state, bill_number, title, status, last_action, last_action_date, confidence_score, legiscan_url")
245+
246+
if not args.all:
247+
query = query.gt("confidence_score", 0)
248+
249+
query = query.is_("skipped_reason", "null")
250+
251+
if args.state:
252+
query = query.eq("state", args.state)
253+
254+
query = query.order("confidence_score", desc=True)
255+
256+
result = query.execute()
257+
bills = result.data[:args.limit]
258+
259+
print(f"Bill Status Refresh")
260+
print(f"===================")
261+
print(f"Bills to refresh: {len(bills)} (of {len(result.data)} total)")
262+
if args.state:
263+
print(f"State filter: {args.state}")
264+
print(f"Dry run: {args.dry_run}")
265+
print()
266+
267+
updated = 0
268+
skipped = 0
269+
errors = 0
270+
271+
for i, bill in enumerate(bills):
272+
state = bill["state"]
273+
bn = bill["bill_number"]
274+
state_name = ABBR_TO_STATE.get(state, state)
275+
276+
print(f"[{i+1}/{len(bills)}] {state} {bn}...", end=" ", flush=True)
277+
278+
try:
279+
# Search for the bill on OpenStates by state + bill number
280+
detail = search_bill_on_openstates(state_name, bn)
281+
282+
if not detail:
283+
print("not found on OpenStates")
284+
skipped += 1
285+
continue
286+
287+
# Extract actions and classify stage
288+
actions = detail.get("actions", [])
289+
stage = classify_stage(actions) if actions else "introduced"
290+
291+
# Get latest action info
292+
latest_action = detail.get("latest_action_description", "")
293+
latest_action_date = detail.get("latest_action_date", "") or None
294+
295+
# Determine if anything changed
296+
old_action = bill.get("last_action", "")
297+
old_date = bill.get("last_action_date", "")
298+
299+
stage_label = STAGE_LABELS.get(stage, stage)
300+
301+
if latest_action == old_action and latest_action_date == old_date:
302+
print(f"{stage_label} (no change)")
303+
skipped += 1
304+
else:
305+
print(f"{stage_label} | {latest_action[:50]} ({latest_action_date})")
306+
307+
if not args.dry_run:
308+
update_data = {
309+
"last_action": latest_action,
310+
"last_action_date": latest_action_date,
311+
"status": stage_label,
312+
}
313+
314+
supabase.table("processed_bills") \
315+
.update(update_data) \
316+
.eq("id", bill["id"]) \
317+
.execute()
318+
319+
updated += 1
320+
321+
except Exception as e:
322+
print(f"ERROR: {e}")
323+
errors += 1
324+
325+
# Rate limiting: 10 req/min on free tier
326+
if i < len(bills) - 1:
327+
time.sleep(7)
328+
329+
print()
330+
print(f"Done!")
331+
print(f" Updated: {updated}")
332+
print(f" No change: {skipped}")
333+
print(f" Errors: {errors}")
334+
return 0
335+
336+
337+
if __name__ == "__main__":
338+
sys.exit(main())

0 commit comments

Comments
 (0)