Skip to content

Commit

Permalink
wip: debug long island coordinate reference system
Browse files Browse the repository at this point in the history
  • Loading branch information
jaanli committed Apr 21, 2024
1 parent b2e2af2 commit 9b35c4f
Show file tree
Hide file tree
Showing 4 changed files with 2,943 additions and 24 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

## Demo Notebooks

- Parsing the entirety of the 3.9 million rows and 21 columns of the Department of Buildings Permit Issuance dataset from NYC Open Data:
- Parsing the entirety of the 3.9 million rows and 60 columns of the Department of Buildings Permit Issuance dataset from NYC Open Data:
- in the `notebooks` directory: https://github.com/jaanli/new-york-real-estate/blob/main/notebooks/nyc.gov_department_of_buildings_permit_issuance_data_processing.ipynb or: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jaanli/new-york-real-estate/blob/main/notebooks/nyc.gov_department_of_buildings_permit_issuance_data_processing.ipynb)

## Getting started
Expand Down
81 changes: 60 additions & 21 deletions data_processing/models/cityofnewyork.us/dob_permit_issuance.sql
Original file line number Diff line number Diff line change
@@ -1,27 +1,66 @@
{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }}

SELECT
"BOROUGH"::ENUM ('BRONX','BROOKLYN','MANHATTAN','QUEENS','STATEN I;iay;iaSLAND','STATEN ISLAND') AS "BOROUGH",
regexp_replace("Job doc. #", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12') AS "Job doc. #",
"Job Type"::ENUM ('A1','A2','A3','DM','NB','SG') AS "Job Type",
"Self_Cert"::ENUM ('J','N','R','X','Y') AS "Self_Cert",
regexp_replace("Bldg Type", '0', '')::ENUM ('1','2') AS "Bldg Type",
"Residential"::ENUM ('YES') AS "Residential",
"Special District 2"::ENUM ('BPRK','GCP2','GW','HILI','IBZ','JAM','POPS') AS "Special District 2",
"Work Type"::ENUM ('BL','CC','EQ','FA','FB','FP','FS','MH','NB','OT','PL','SD','SP') AS "Work Type",
"Permit Status"::ENUM ('IN PROCESS','ISSUED','RE-ISSUED','REVOKED') AS "Permit Status",
"Filing Status"::ENUM ('INITIAL','RENEWAL') AS "Filing Status",
"Permit Type"::ENUM ('AL','DM','EQ','EW','FO','NB','PL','SG') AS "Permit Type",
regexp_replace("Permit Sequence #", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36') AS "Permit Sequence #",
"Permit Subtype"::ENUM ('BL','CH','EA','FA','FB','FN','FP','FS','MH','OT','SC','SD','SF','SH','SP') AS "Permit Subtype",
"Oil Gas"::ENUM ('GAS','OIL') AS "Oil Gas",
"Site Fill"::ENUM ('NONE','NOT APPLICABLE','OFF-SITE','ON-SITE','USE UNDER 300 CU.YD') AS "Site Fill",
"Permittee's License Type"::ENUM ('5S','DM','FS','GC','HI','MP','N','NW','OB','OW','PE','RA','SI','T@') AS "Permittees License Type",
"Act as Superintendent"::ENUM ('A','N','Y') AS "Act as Superintendent",
regexp_replace("Owner's Business Type", '[^a-zA-Z0-9-/;: ]', '', 'g')::ENUM ('2022-05-09 00:00:00','CONDO/CO-OP','CORPORATION','DCAS','DOE','HHC','HPD','INDIVIDUAL','NY STATE','NYC AGENCY','NYCHA','NYCHA/HHC','OTHER','OTHER GOVT AGENCY','PARTNERSHIP') AS "Owners Business Type",
"Non-Profit"::ENUM ('8','N','Y','—') AS "Non-Profit",
"Owner’s House State"::ENUM ('AK','AZ','CA','CO','CT','DC','DE','FL','GA','IA','IL','IN','KS','KY','LA','MA','MD','ME','MI','MN','MO','NC','ND','NE','NH','NJ','NM','NV','NY','OH','OK','OR','PA','PR','RI','SC','SD','TN','TX','UT','VA','VT','WA') AS "Owners House State",
regexp_replace("COUNCIL_DISTRICT", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50','51') AS "COUNCIL_DISTRICT",
"BOROUGH"::ENUM ('BRONX','BROOKLYN','MANHATTAN','QUEENS','STATEN I;iay;iaSLAND','STATEN ISLAND') AS "BOROUGH",
"Bin #" AS "Bin #",
"House #" AS "House #",
"Street Name" AS "Street Name",
"Job #" AS "Job #",
regexp_replace("Job doc. #", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12') AS "Job doc. #",
"Job Type"::ENUM ('A1','A2','A3','DM','NB','SG') AS "Job Type",
"Self_Cert"::ENUM ('J','N','R','X','Y') AS "Self_Cert",
"Block" AS "Block",
"Lot" AS "Lot",
"Community Board" AS "Community Board",
"Zip Code" AS "Zip Code",
regexp_replace("Bldg Type", '0', '')::ENUM ('1','2') AS "Bldg Type",
"Residential"::ENUM ('YES') AS "Residential",
"Special District 1" AS "Special District 1",
"Special District 2"::ENUM ('BPRK','GCP2','GW','HILI','IBZ','JAM','POPS') AS "Special District 2",
"Work Type"::ENUM ('BL','CC','EQ','FA','FB','FP','FS','MH','NB','OT','PL','SD','SP') AS "Work Type",
"Permit Status"::ENUM ('IN PROCESS','ISSUED','RE-ISSUED','REVOKED') AS "Permit Status",
"Filing Status"::ENUM ('INITIAL','RENEWAL') AS "Filing Status",
"Permit Type"::ENUM ('AL','DM','EQ','EW','FO','NB','PL','SG') AS "Permit Type",
regexp_replace("Permit Sequence #", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36') AS "Permit Sequence #",
"Permit Subtype"::ENUM ('BL','CH','EA','FA','FB','FN','FP','FS','MH','OT','SC','SD','SF','SH','SP') AS "Permit Subtype",
"Oil Gas"::ENUM ('GAS','OIL') AS "Oil Gas",
"Site Fill"::ENUM ('NONE','NOT APPLICABLE','OFF-SITE','ON-SITE','USE UNDER 300 CU.YD') AS "Site Fill",
"Filing Date" AS "Filing Date",
"Issuance Date" AS "Issuance Date",
"Expiration Date" AS "Expiration Date",
"Job Start Date" AS "Job Start Date",
"Permittee's First Name" AS "Permittee's First Name",
"Permittee's Last Name" AS "Permittee's Last Name",
"Permittee's Business Name" AS "Permittee's Business Name",
"Permittee's Phone #" AS "Permittee's Phone #",
"Permittee's License Type"::ENUM ('5S','DM','FS','GC','HI','MP','N','NW','OB','OW','PE','RA','SI','T@') AS "Permittees License Type",
"Permittee's License #" AS "Permittee's License #",
"Act as Superintendent"::ENUM ('A','N','Y') AS "Act as Superintendent",
"Permittee's Other Title" AS "Permittee's Other Title",
"HIC License" AS "HIC License",
"Site Safety Mgr's First Name" AS "Site Safety Mgr's First Name",
"Site Safety Mgr's Last Name" AS "Site Safety Mgr's Last Name",
"Site Safety Mgr Business Name" AS "Site Safety Mgr Business Name",
"Superintendent First & Last Name" AS "Superintendent First & Last Name",
"Superintendent Business Name" AS "Superintendent Business Name",
regexp_replace("Owner's Business Type", '[^a-zA-Z0-9-/;: ]', '', 'g')::ENUM ('2022-05-09 00:00:00','CONDO/CO-OP','CORPORATION','DCAS','DOE','HHC','HPD','INDIVIDUAL','NY STATE','NYC AGENCY','NYCHA','NYCHA/HHC','OTHER','OTHER GOVT AGENCY','PARTNERSHIP') AS "Owners Business Type",
"Non-Profit"::ENUM ('8','N','Y','—') AS "Non-Profit",
"Owner's Business Name" AS "Owner's Business Name",
"Owner's First Name" AS "Owner's First Name",
"Owner's Last Name" AS "Owner's Last Name",
"Owner's House #" AS "Owner's House #",
"Owner's House Street Name" AS "Owner's House Street Name",
"Owner’s House City" AS "Owner’s House City",
"Owner’s House State"::ENUM ('AK','AZ','CA','CO','CT','DC','DE','FL','GA','IA','IL','IN','KS','KY','LA','MA','MD','ME','MI','MN','MO','NC','ND','NE','NH','NJ','NM','NV','NY','OH','OK','OR','PA','PR','RI','SC','SD','TN','TX','UT','VA','VT','WA') AS "Owners House State",
"Owner’s House Zip Code" AS "Owner’s House Zip Code",
"Owner's Phone #" AS "Owner's Phone #",
"DOBRunDate" AS "DOBRunDate",
"PERMIT_SI_NO" AS "PERMIT_SI_NO",
"LATITUDE" AS "LATITUDE",
"LONGITUDE" AS "LONGITUDE",
regexp_replace("COUNCIL_DISTRICT", '0', '')::ENUM ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50','51') AS "COUNCIL_DISTRICT",
"CENSUS_TRACT" AS "CENSUS_TRACT",
"NTA_NAME" AS "NTA_NAME",
FROM read_csv('~/Downloads/DOB_Permit_Issuance_20240419.csv',
types={
'Bldg Type': 'VARCHAR',
Expand Down
Loading

0 comments on commit 9b35c4f

Please sign in to comment.