diff --git a/deployment/dataproc/Dockerfile b/deployment/dataproc/Dockerfile new file mode 100644 index 000000000..20fc8bade --- /dev/null +++ b/deployment/dataproc/Dockerfile @@ -0,0 +1,8 @@ +FROM ubuntu:20.04 + +RUN mkdir -p /usr/local/src +WORKDIR /usr/local/src + +RUN apt update && apt install -y curl osmctools + +COPY . /usr/local/src diff --git a/deployment/dataproc/combine-and-clip.sh b/deployment/dataproc/combine-and-clip.sh new file mode 100755 index 000000000..8d088b69a --- /dev/null +++ b/deployment/dataproc/combine-and-clip.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +BOUNDS=-76.209582,38.441753,-74.243725,40.725449 + +declare -a STATES=("pennsylvania" "new-jersey" "delaware") + +for STATE in "${STATES[@]}"; do + echo "extracting OSM data for the region served by CAC for ${STATE}..." + osmconvert ${STATE}.pbf -b=${BOUNDS} \ + --complete-ways --complex-ways --complete-boundaries --complete-multipolygons \ + -o=${STATE}.o5m +done + +echo "combining state region extracts..." + +INPUTS=$(printf "%s.o5m " "${STATES[@]}") + +osmconvert ${INPUTS} -o=cac.pbf + +echo "all done!" diff --git a/deployment/dataproc/docker-compose.yml b/deployment/dataproc/docker-compose.yml new file mode 100644 index 000000000..ea7acca97 --- /dev/null +++ b/deployment/dataproc/docker-compose.yml @@ -0,0 +1,8 @@ +version: "3" +services: + osm: + build: + context: . + dockerfile: Dockerfile + volumes: + - ./:/usr/local/src diff --git a/deployment/dataproc/download-osm-data.sh b/deployment/dataproc/download-osm-data.sh new file mode 100755 index 000000000..f7be1d8b5 --- /dev/null +++ b/deployment/dataproc/download-osm-data.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +declare -a STATES=("pennsylvania" "new-jersey" "delaware") + +for STATE in "${STATES[@]}"; do + echo "downloading OSM data for ${STATE}..." + curl https://download.geofabrik.de/north-america/us/${STATE}-latest.osm.pbf -o ${STATE}.pbf +done