From 4d43247fe94a56e6c51391595b207ff97f30872f Mon Sep 17 00:00:00 2001 From: jaanli Date: Tue, 23 Apr 2024 06:54:47 -0600 Subject: [PATCH] wip: debug real property master acris data dictionary from department of finance --- README.md | 6 +- .../dof_acris_real_property_master.sql | 283 +++++++++++++++++- ...on_system_acris_real_property_master.ipynb | 102 ++++++- 3 files changed, 375 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index efcc78b..3535b44 100644 --- a/README.md +++ b/README.md @@ -81,4 +81,8 @@ ffmpeg -i trimmed_recording.mov -filter:v "setpts=PTS/5,fps=24" -an sped.mov # make gif ffmpeg -i sped.mov -vf "fps=20,scale=1080:-1:flags=lanczos,palettegen=stats_mode=diff" -y palette.png ffmpeg -i sped.mov -i palette.png -filter_complex "fps=20,scale=1080:-1:flags=lanczos[x];[x][1:v]paletteuse=dither=bayer:bayer_scale=5:diff_mode=rectangle" -y high_quality.gif -``` \ No newline at end of file +``` + +## SQL Formatting + +We rely on Name: dbt formatter (https://marketplace.visualstudio.com/items?itemName=henriblancke.vscode-dbt-formatter) for this. \ No newline at end of file diff --git a/data_processing/models/cityofnewyork.us/dof_acris_real_property_master.sql b/data_processing/models/cityofnewyork.us/dof_acris_real_property_master.sql index f1b9523..5b5d77a 100644 --- a/data_processing/models/cityofnewyork.us/dof_acris_real_property_master.sql +++ b/data_processing/models/cityofnewyork.us/dof_acris_real_property_master.sql @@ -1,5 +1,280 @@ -{{ config(materialized='external', location=var('output_path') + '/' + this.name + '.parquet') }} +{{ config( + materialized = 'external', + location = var('output_path') + '/' + this.name + '.parquet' +) }} -SELECT * FROM read_csv_auto('~/Downloads/ACRIS_-_Real_Property_Master_20240422.csv', - types={'DOC. DATE': 'VARCHAR'}, - dateformat='%m/%d/%Y') \ No newline at end of file +SELECT + "DOCUMENT ID", + "RECORD TYPE", + "CRFN", + "BOROUGH", + "DOC. TYPE" :: enum( + 'AGMT', + 'AMFL', + 'ASPM', + 'ASST', + 'ASTU', + 'ASUM', + 'BOND', + 'BRUP', + 'CERR', + 'CNFL', + 'CERT', + 'CNTR', + 'CODP', + 'CONS', + 'CONT', + 'CTOR', + 'DCTO', + 'DECL', + 'DEED', + 'DEMM', + 'DTL', + 'EASE', + 'FL', + 'FTL', + 'IDED', + 'INIC', + 'INIT', + 'JUDG', + 'LDMK', + 'LEAS', + 'LIC', + 'LTPA', + 'MAPS', + 'MERG', + 'MISC', + 'MMTG', + 'MTGE', + 'PAT', + 'PRFL', + 'PSAT', + 'PSGN', + 'REL', + 'RFL', + 'RESO', + 'RFTL', + 'RLSE', + 'RPAT', + 'RTXL', + 'SAGE', + 'SAT', + 'SMIS', + 'SMTG', + 'STP', + 'SUBL', + 'SUBO', + 'TLS', + 'TERM', + 'TERT', + 'UCC1', + 'UCC3', + 'VAC', + 'WILL', + 'ASGN', + 'ASSTO', + 'WSAT', + 'RETT', + 'RPTT', + 'CDEC', + 'AL&R', + 'DEEDO', + 'AMTX', + 'AMND', + 'AMTL', + 'ATL', + 'RPTT&RET', + 'CORR', + 'CORP', + 'ZONE', + 'AALR', + 'CALR', + 'ADEC', + 'LOCC', + 'TOLCC', + 'DEVR', + 'DECM', + 'MLEA', + 'MCON', + 'M&CON', + 'SPRD', + 'TL&R', + 'SUBM', + 'PREL', + 'ACON', + 'CORRD', + 'CORRM', + 'CONDEED', + 'REIT', + 'TERL', + 'ESTL', + 'XXXX', + 'CMTG', + 'WFL', + 'ESRM', + 'NTXL', + 'NAPP', + 'TERA', + 'RCRFL', + 'DEED COR', + 'DEED, LE', + 'CORR, LE', + 'DEED, TS', + 'UCC ADEN', + 'TERDECL', + 'NAFTL', + 'APPRT', + 'AIRRIGHT', + 'SI CORR', + 'PWFL', + 'PRCFL', + 'DPFTL', + 'DEEDP', + 'TORREN', + 'DEED, RC', + 'SCDEC' + ) AS "DOC. TYPE", + CASE + "DOC. TYPE" -- from https://catalog.data.gov/dataset/acris-document-control-codes + WHEN 'AGMT' THEN 'AGREEMENT' + WHEN 'AMFL' THEN 'AMENDMENT OF FEDERAL LIEN' + WHEN 'ASPM' THEN 'ASSUMPTION OF MORTGAGE' + WHEN 'ASST' THEN 'ASSIGNMENT, MORTGAGE' + WHEN 'ASTU' THEN 'UNIT ASSIGNMENT' + WHEN 'ASUM' THEN 'UCC3 ASSUMPTION' + WHEN 'BOND' THEN 'BOND' + WHEN 'BRUP' THEN 'UCC3 BANKRUPTCY' + WHEN 'CERR' THEN 'CERTIFICATE OF REDUCTION' + WHEN 'CNFL' THEN 'CONTINUATION OF FEDERAL LIEN' + WHEN 'CERT' THEN 'CERTIFICATE' + WHEN 'CNTR' THEN 'CONTRACT OF SALE' + WHEN 'CODP' THEN 'CONDEMNATION PROCEEDINGS' + WHEN 'CONS' THEN 'CONSENT' + WHEN 'CONT' THEN 'UCC3 CONTINUATION' + WHEN 'CTOR' THEN 'COURT ORDER' + WHEN 'DCTO' THEN 'COURT ORDER ADVERSE POSS.' + WHEN 'DECL' THEN 'DECLARATION' + WHEN 'DEED' THEN 'DEED' + WHEN 'DEMM' THEN 'DECLARATION OF MODIFI OF MRT' + WHEN 'DTL' THEN 'DISCHARGE OF TAX LIEN' + WHEN 'EASE' THEN 'EASEMENT' + WHEN 'FL' THEN 'FEDERAL LIEN-IRS' + WHEN 'FTL' THEN 'FEDERAL LIEN, OTHER' + WHEN 'IDED' THEN 'IN REM DEED' + WHEN 'INIC' THEN 'INITIAL COOP UCC1' + WHEN 'INIT' THEN 'INITIAL UCC1' + WHEN 'JUDG' THEN 'JUDGMENT' + WHEN 'LDMK' THEN 'LANDMARK DESIGNATION' + WHEN 'LEAS' THEN 'LEASE' + WHEN 'LIC' THEN 'LICENSE' + WHEN 'LTPA' THEN 'LETTERS PATENT' + WHEN 'MAPS' THEN 'MAPS' + WHEN 'MERG' THEN 'MERGER' + WHEN 'MISC' THEN 'MISCELLANEOUS' + WHEN 'MMTG' THEN 'MASTER MORTGAGE' + WHEN 'MTGE' THEN 'MORTGAGE' + WHEN 'PAT' THEN 'POWER OF ATTORNEY' + WHEN 'PRFL' THEN 'PARTIAL RELEASE OF FED LIEN' + WHEN 'PSAT' THEN 'PARTIAL SATISFACTION' + WHEN 'PSGN' THEN 'UCC3 PARTIAL ASSIGNMENT' + WHEN 'REL' THEN 'RELEASE' + WHEN 'RFL' THEN 'RELEASE OF FEDERAL LIEN' + WHEN 'RESO' THEN 'RESOLUTION' + WHEN 'RFTL' THEN 'RELEASE OF FEDERAL TAX LIEN' + WHEN 'RLSE' THEN 'UCC3 RELEASE/UCC AMENDMENT' + WHEN 'RPAT' THEN 'REVOCATION OF POWER OF ATTORNE' + WHEN 'RTXL' THEN 'RELEASE OF ESTATE TAX LIEN' + WHEN 'SAGE' THEN 'SUNDRY AGREEMENT' + WHEN 'SAT' THEN 'SATISFACTION OF MORTGAGE' + WHEN 'SMIS' THEN 'SUNDRY MISCELLANEOUS' + WHEN 'SMTG' THEN 'SUNDRY MORTGAGE' + WHEN 'STP' THEN 'STREET PROCEDURE' + WHEN 'SUBL' THEN 'SUBORDINATION OF LEASE' + WHEN 'SUBO' THEN 'UCC3 SUBORDINATION' + WHEN 'TLS' THEN 'TAX LIEN SALE CERTIFICATE' + WHEN 'TERM' THEN 'UCC3 TERMINATION' + WHEN 'TERT' THEN 'TERMINATION OF TRUST' + WHEN 'UCC1' THEN 'UNIFORM COMMERCIAL CODE 1' + WHEN 'UCC3' THEN 'UNIFORM COMMERCIAL CODE 3' + WHEN 'VAC' THEN 'VACATE ORDER' + WHEN 'WILL' THEN 'CERTIFIED COPY OF WILL' + WHEN 'ASGN' THEN 'UCC3 ASSIGNMENT' + WHEN 'ASSTO' THEN 'ASSIGNMENT OF LEASE' + WHEN 'WSAT' THEN 'WITHHELD SATISFACTION' + WHEN 'RETT' THEN 'NYS REAL ESTATE TRANSFER TAX' + WHEN 'RPTT' THEN 'NYC REAL PROPERTY TRANSFER TAX' + WHEN 'CDEC' THEN 'CONDO DECLARATION' + WHEN 'AL&R' THEN 'ASSIGNMENT OF LEASES AND RENTS' + WHEN 'DEEDO' THEN 'DEED, OTHER' + WHEN 'AMTX' THEN 'ADDITIONAL MORTGAGE TAX' + WHEN 'AMND' THEN 'UCC3 AMENDMENT' + WHEN 'AMTL' THEN 'AMENDMENT OF TAX LIEN' + WHEN 'ATL' THEN 'ASSIGNMENT OF TAX LIEN' + WHEN 'RPTT&RET' THEN 'BOTH RPTT AND RETT' + WHEN 'CORR' THEN 'CORRECTION DOC-OFFICE USE ONLY' + WHEN 'CORP' THEN 'UCC 5 CORRECTION STATEMENT' + WHEN 'ZONE' THEN 'ZONING LOT DESCRIPTION' + WHEN 'AALR' THEN 'ASGN OF ASGN OF L&R' + WHEN 'CALR' THEN 'CANCEL/TERM ASGN L&R' + WHEN 'ADEC' THEN 'AMENDED CONDO DECLARATION' + WHEN 'LOCC' THEN 'LIEN OF COMMON CHARGES' + WHEN 'TOLCC' THEN 'TERM OF LIEN OF COMMON CHARGES' + WHEN 'DEVR' THEN 'DEVELOPMENT RIGHTS' + WHEN 'DECM' THEN 'DECLARATION OF MERGER' + WHEN 'MLEA' THEN 'MEMORANDUM OF LEASE' + WHEN 'MCON' THEN 'MEMORANDUM OF CONTRACT' + WHEN 'M&CON' THEN 'MORTGAGE AND CONSOLIDATION' + WHEN 'SPRD' THEN 'MORTGAGE SPREADER AGREEMENT' + WHEN 'TL&R' THEN 'TERMINATION OF ASSIGN OF L&R' + WHEN 'SUBM' THEN 'SUBORDINATION OF MORTGAGE' + WHEN 'PREL' THEN 'PARTIAL RELEASE OF MORTGAGE' + WHEN 'ACON' THEN 'ASSIGN/TERM OF CONTRACT/BID' + WHEN 'CORRD' THEN 'CORRECTION DEED' + WHEN 'CORRM' THEN 'CORRECTION MORTGAGE' + WHEN 'CONDEED' THEN 'CONFIRMATORY DEED' + WHEN 'REIT' THEN 'REAL ESTATE INV TRUST DEED' + WHEN 'TERL' THEN 'TERMINATION OF LEASE OR MEMO' + WHEN 'ESTL' THEN 'ESTOPPEL FOR OFFICE USE ONLY' + WHEN 'XXXX' THEN 'APPRT BREAKDWN OFFICE USE ONLY' + WHEN 'CMTG' THEN 'COLLATERAL MORTGAGE' + WHEN 'WFL' THEN 'WITHDRAWAL OF A FED LIEN' + WHEN 'ESRM' THEN 'ESTOPPAL REMOVAL OFFICE USE ON' + WHEN 'NTXL' THEN 'NOTICE OF ESTATE TAX LIEN' + WHEN 'NAPP' THEN 'NOTICE OF APPROPRIATION' + WHEN 'TERA' THEN 'TERMINATION OF AGREEMENT' + WHEN 'RCRFL' THEN 'REVOCATION OF CERTIF. OF RFL' + WHEN 'DEED COR' THEN 'CORRECT INDEX/DEED-OFFICE USE' + WHEN 'DEED, LE' THEN 'LIFE ESTATE DEED' + WHEN 'CORR, LE' THEN 'CORRECT LIFE ESTATE OFFICE USE' + WHEN 'DEED, TS' THEN 'TIMESHARE DEED' + WHEN 'UCC ADEN' THEN 'UCC COOPERATIVE ADDENDUM' + WHEN 'TERDECL' THEN 'TERM. OF CONDO DECLARATION' + WHEN 'NAFTL' THEN 'CERT NONATTCHMENT FED TAX LIEN' + WHEN 'APPRT' THEN 'APP. ORDER BREAKDWN OFFICE USE' + WHEN 'AIRRIGHT' THEN 'AIR RIGHTS' + WHEN 'SI CORR' THEN 'SI BILLING UPDATE OFFICE USE' + WHEN 'PWFL' THEN 'PARTIAL WITHDRAWL OF FED LIEN' + WHEN 'PRCFL' THEN 'PARTIAL REVOCATION OF CERT RFL' + WHEN 'DPFTL' THEN 'DISCHARGE OF PROPERTY FROM FTL' + WHEN 'DEEDP' THEN 'DEED, PRE RPT TAX' + WHEN 'TORREN' THEN 'TORREN' + WHEN 'DEED, RC' THEN 'DEED WITH RESTRICTIVE COVENANT' + WHEN 'SCDEC' THEN 'DECLARATION OF CONDO IN CONDO' + END AS "DOC. TYPE DESCRIPTION", + "DOC. DATE", + CAST( + "DOC. AMOUNT" AS DECIMAL + ) AS "DOC. AMOUNT", + "RECORDED / FILED", + "MODIFIED DATE", + "REEL YEAR", + "REEL NBR", + "REEL PAGE", + "% TRANSFERRED", + "GOOD THROUGH DATE" +FROM + read_csv_auto( + '~/Downloads/ACRIS_-_Real_Property_Master_20240422.csv', + types ={ 'DOC. DATE': 'VARCHAR' }, + dateformat = '%m/%d/%Y' + ) diff --git a/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb b/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb index 9937fc9..88295f7 100644 --- a/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb +++ b/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb @@ -93,15 +93,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "The sql extension is already loaded. To reload it, use:\n", - " %reload_ext sql\n" + "/Users/me/jaanli/new-york-real-estate/.venv/lib/python3.12/site-packages/sql/traits.py:20: FutureWarning: named_parameters: boolean values are now deprecated. Value True will be treated as \"enabled\". \n", + "Please use a valid option: \"warn\", \"enabled\", or \"disabled\". \n", + "For more information, see the docs: https://jupysql.ploomber.io/en/latest/api/configuration.html#named-parameters\n", + " warnings.warn(\n" ] } ], @@ -857,7 +859,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -866,7 +868,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -879,7 +881,7 @@ " dtype='object')" ] }, - "execution_count": 21, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -890,7 +892,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -899,7 +901,7 @@ "" ] }, - "execution_count": 33, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, @@ -920,7 +922,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1073,7 +1075,7 @@ "16178188 0 0 0.0 2023-07-31 " ] }, - "execution_count": 34, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1082,6 +1084,84 @@ "df[df['DOC. AMOUNT'] > 10000000000]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Real Property Parties\n", + "\n", + "So many fun events are happening related to new york city events, and we can use this data to help us plan related parties and associated festivities :)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-04-23 06:13:15-- https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.199, 52.206.68.26, 52.206.140.205\n", + "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.140.199|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/csv]\n", + "Saving to: ‘rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD’\n", + "\n", + "pe=DOWNLOAD [ <=> ] 162.01M 3.56MB/s ^C\n" + ] + } + ], + "source": [ + "!wget \"https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Dictionary\n", + "\n", + "From https://catalog.data.gov/dataset/acris-document-control-codes" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-04-23 06:15:13-- https://data.cityofnewyork.us/api/views/7isb-wh4c/rows.csv?accessType=DOWNLOAD\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.199, 52.206.68.26, 52.206.140.205\n", + "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.140.199|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/csv]\n", + "Saving to: ‘rows.csv?accessType=DOWNLOAD’\n", + "\n", + "rows.csv?accessType [ <=> ] 9.03K --.-KB/s in 0s \n", + "\n", + "2024-04-23 06:15:13 (679 MB/s) - ‘rows.csv?accessType=DOWNLOAD’ saved [9251]\n", + "\n" + ] + } + ], + "source": [ + "!wget \"https://data.cityofnewyork.us/api/views/7isb-wh4c/rows.csv?accessType=DOWNLOAD\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "!mv rows.csv\\?accessType\\=DOWNLOAD dof_acris_control_codes.csv" + ] + }, { "cell_type": "code", "execution_count": null,