From 11c31f53c9965beca9cf8cbaf89c4f1ac16fb13b Mon Sep 17 00:00:00 2001 From: jaanli Date: Mon, 29 Apr 2024 18:01:38 -0400 Subject: [PATCH] wip: debug real property parties replication of bug --- ...on_system_acris_real_property_master.ipynb | 167 ++++++++++++++++-- 1 file changed, 153 insertions(+), 14 deletions(-) diff --git a/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb b/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb index 88295f7..01225e1 100644 --- a/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb +++ b/notebooks/cityofnewyork.us_department_of_finance_automated_city_register_information_system_acris_real_property_master.ipynb @@ -93,9 +93,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deploy Shiny apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup\n" + ] + }, { "name": "stderr", "output_type": "stream", @@ -1090,31 +1097,163 @@ "source": [ "# Real Property Parties\n", "\n", - "So many fun events are happening related to new york city events, and we can use this data to help us plan related parties and associated festivities :)" + "So many fun events are happening related to new york city events, and we can use this data to help us plan related parties and associated festivities :)\n", + "\n", + "Source: \n", + "\n", + "https://data.cityofnewyork.us/City-Government/ACRIS-Real-Property-Parties/636b-3b5g/about_data" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-04-29 17:34:05-- https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.68.26, 52.206.140.205, 52.206.140.199\n", + "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.68.26|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/csv]\n", + "Saving to: ‘rows.csv?accessType=DOWNLOAD.3’\n", + "\n", + "rows.csv?accessType [ <=> ] 4.01G 4.93MB/s in 16m 52s \n", + "\n", + "2024-04-29 17:50:57 (4.05 MB/s) - ‘rows.csv?accessType=DOWNLOAD.3’ saved [4301658835]\n", + "\n" + ] + } + ], + "source": [ + "!wget \"https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mv rows.csv?accessType=DOWNLOAD ~/data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "--2024-04-23 06:13:15-- https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD\n", - "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.199, 52.206.68.26, 52.206.140.205\n", + "DOCUMENT ID,RECORD TYPE,PARTY TYPE,NAME,ADDRESS 1,ADDRESS 2,COUNTRY,CITY,STATE,ZIP,GOOD THROUGH DATE\n", + "2018073000132001,P,1,\"TAYLOR, EMILY\",\"360 FURMAN STREET, UNIT 933\",,US,BROOKLYN,NY,11201,08/31/2018 12:00:00 AM\n", + "2018082900859001,P,2,QUICKEN LOANS INC.,1050 WOODWARD AVE,,US,DETROIT,MI,48226,08/31/2018 12:00:00 AM\n", + "2018081600002001,P,2,\"MORTGAGE ELECTRONIC REGISTRATION SYSTEMS, INC\",,,,,,,08/31/2018 12:00:00 AM\n", + "2018081701275001,P,1,\"KIM, NANCY\",\"6210 WOODSIDE AVE, APT 211\",,US,WOODSIDE,NY,11377,08/31/2018 12:00:00 AM\n", + "2018082701133001,P,1,LAWRENCE SKEETE,198 LEFFERTS PLACE,,US,BROOKLYN,NY,11238,08/31/2018 12:00:00 AM\n", + "2018081600597003,P,2,HABITAT QUEENS PHASE II HOUSING DEVELOPMENT CORP,111 JOHN STREET - 23RD FLOOR,,US,NEW YORK,NY,10038,08/31/2018 12:00:00 AM\n", + "2018081700353004,P,2,\"JUBA, AMY LANDESS\",\"6355 METROWEST BOULEVARD, SUITE 180\",,US,ORLANDO,FL,32835,08/31/2018 12:00:00 AM\n", + "2018081100034001,P,1,\"FORRESTER, JESSICA\",\"80 WINTHROP STREET, APT L1\",,US,BROOKLYN,NY,11225,08/31/2018 12:00:00 AM\n", + "2018082100327004,P,1,\"MONTUORO, POONAM K\",153-22 83RD STREET,UNIT # 6D-U,US,HOWARD BEACH,NY,11414,08/31/2018 12:00:00 AM\n" + ] + } + ], + "source": [ + "!head ~/data/rows.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Try downloading from GitHub direct link\n", + "\n", + "https://github.com/nycdb/nycdb/blob/main/src/nycdb/datasets/acris.yml lists another direct link to try..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-04-25 08:07:51-- https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.199, 52.206.140.205, 52.206.68.26\n", "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.140.199|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/csv]\n", - "Saving to: ‘rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD’\n", + "Saving to: ‘rows.csv?accessType=DOWNLOAD’\n", "\n", - "pe=DOWNLOAD [ <=> ] 162.01M 3.56MB/s ^C\n" + "rows.csv?accessType [ <=> ] 64.57M 244KB/s in 4m 36s \n", + "\n", + "\n", + "Cannot write to ‘rows.csv?accessType=DOWNLOAD’ (Success).\n" ] } ], "source": [ - "!wget \"https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?fourfour=636b-3b5g&cacheBust=1712779124&date=20240423&accessType=DOWNLOAD\"" + "!wget \"https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "!mv rows.csv?accessType=DOWNLOAD ~/data/rows.csv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Try downloading via API link from beta.nyc\n", + "\n", + "Thanks to Zachary for pointing this out! https://dev.socrata.com/foundry/data.cityofnewyork.us/636b-3b5g" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-04-25 08:13:50-- https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD&api_foundry=true\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.205, 52.206.140.199, 52.206.68.26\n", + "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.140.205|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: unspecified [text/csv]\n", + "Saving to: ‘rows.csv?accessType=DOWNLOAD&api_foundry=true’\n", + "\n", + "rows.csv?accessType [ <=> ] 196.27M 188KB/s in 13m 27s \n", + "\n", + "2024-04-25 08:27:18 (249 KB/s) - ‘rows.csv?accessType=DOWNLOAD&api_foundry=true’ saved [205799495]\n", + "\n" + ] + } + ], + "source": [ + "!wget \"https://data.cityofnewyork.us/api/views/636b-3b5g/rows.csv?accessType=DOWNLOAD&api_foundry=true\"" ] }, { @@ -1135,16 +1274,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-04-23 06:15:13-- https://data.cityofnewyork.us/api/views/7isb-wh4c/rows.csv?accessType=DOWNLOAD\n", - "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.140.199, 52.206.68.26, 52.206.140.205\n", - "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.140.199|:443... connected.\n", + "--2024-04-29 17:33:33-- https://data.cityofnewyork.us/api/views/7isb-wh4c/rows.csv?accessType=DOWNLOAD\n", + "Resolving data.cityofnewyork.us (data.cityofnewyork.us)... 52.206.68.26, 52.206.140.205, 52.206.140.199\n", + "Connecting to data.cityofnewyork.us (data.cityofnewyork.us)|52.206.68.26|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/csv]\n", - "Saving to: ‘rows.csv?accessType=DOWNLOAD’\n", + "Saving to: ‘rows.csv?accessType=DOWNLOAD.2’\n", "\n", "rows.csv?accessType [ <=> ] 9.03K --.-KB/s in 0s \n", "\n", - "2024-04-23 06:15:13 (679 MB/s) - ‘rows.csv?accessType=DOWNLOAD’ saved [9251]\n", + "2024-04-29 17:33:33 (2.15 GB/s) - ‘rows.csv?accessType=DOWNLOAD.2’ saved [9251]\n", "\n" ] } @@ -1186,7 +1325,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.12.3" } }, "nbformat": 4,