Skip to content

Commit 0ca4a89

Browse files
revisions based on Mjumbe's review
1 parent 2bcea54 commit 0ca4a89

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

airflow/plugins/operators/scrape_state_geoportal.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import gzip
22
import logging
3-
4-
# import os
3+
import os
54
from typing import ClassVar, List
65

76
import pandas as pd # type: ignore
@@ -12,8 +11,7 @@
1211

1312
from airflow.models import BaseOperator # type: ignore
1413

15-
API_BUCKET = "gs://calitp-state-geoportal-scrape"
16-
# API_BUCKET = os.environ["CALITP_BUCKET__STATE_GEOPORTAL_DATA_PRODUCTS"]
14+
API_BUCKET = os.environ["CALITP_BUCKET__STATE_GEOPORTAL_DATA_PRODUCTS"]
1715

1816

1917
class StateGeoportalAPIExtract(PartitionedGCSArtifact):
@@ -88,7 +86,8 @@ def fetch_from_state_geoportal(self):
8886
params["resultOffset"] = offset
8987

9088
# Make the request
91-
response = requests.get(validated_url, params=params).raise_for_status()
89+
response = requests.get(validated_url, params=params)
90+
response.raise_for_status()
9291
data = response.json()
9392

9493
# Break the loop if there are no more features
@@ -187,7 +186,7 @@ def execute(self, **kwargs):
187186
df = pd.json_normalize(api_content)
188187

189188
if self.product == "state_highway_network":
190-
# Select columns to keep, have to be explicit because there are duplicate values after normalizing
189+
# Select columns to keep, have to be explicit before renaming because there are duplicate values after normalizing
191190
df = df[
192191
[
193192
"properties.Route",

0 commit comments

Comments
 (0)