-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: split table into scraped and scraped_airlines
- Loading branch information
Showing
4 changed files
with
194 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,10 +78,10 @@ | |
" <td>511</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852230</td>\n", | ||
" <td>2023-08-04 23:25:00.186922</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
|
@@ -97,10 +97,10 @@ | |
" <td>627</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852262</td>\n", | ||
" <td>2023-08-04 23:25:00.186958</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
|
@@ -116,10 +116,10 @@ | |
" <td>706</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852267</td>\n", | ||
" <td>2023-08-04 23:25:00.186963</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
|
@@ -135,10 +135,10 @@ | |
" <td>586</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852273</td>\n", | ||
" <td>2023-08-04 23:25:00.186968</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
|
@@ -154,29 +154,29 @@ | |
" <td>670</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852278</td>\n", | ||
" <td>2023-08-04 23:25:00.186973</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>5</th>\n", | ||
" <td>2023-10-28 11:30:00</td>\n", | ||
" <td>2023-10-28 21:40:00</td>\n", | ||
" <td>[Delta, Virgin Atlantic, KLM]</td>\n", | ||
" <td>970</td>\n", | ||
" <td>2023-10-28 07:00:00</td>\n", | ||
" <td>2023-10-28 15:25:00</td>\n", | ||
" <td>[KLMDelta]</td>\n", | ||
" <td>865</td>\n", | ||
" <td>MUC</td>\n", | ||
" <td>JFK</td>\n", | ||
" <td>1</td>\n", | ||
" <td>270.0</td>\n", | ||
" <td>DTW</td>\n", | ||
" <td>723</td>\n", | ||
" <td>290.0</td>\n", | ||
" <td>AMS</td>\n", | ||
" <td>706</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852282</td>\n", | ||
" <td>2023-08-04 23:25:00.186978</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>6</th>\n", | ||
|
@@ -192,10 +192,10 @@ | |
" <td>787</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852287</td>\n", | ||
" <td>2023-08-04 23:25:00.186982</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>7</th>\n", | ||
|
@@ -211,10 +211,10 @@ | |
" <td>812</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852291</td>\n", | ||
" <td>2023-08-04 23:25:00.186987</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>8</th>\n", | ||
|
@@ -230,10 +230,10 @@ | |
" <td>830</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852296</td>\n", | ||
" <td>2023-08-04 23:25:00.186992</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>9</th>\n", | ||
|
@@ -246,66 +246,66 @@ | |
" <td>0</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>None</td>\n", | ||
" <td>1316</td>\n", | ||
" <td>1317</td>\n", | ||
" <td>typical</td>\n", | ||
" <td>None</td>\n", | ||
" <td>2023-08-03 19:44:53.852300</td>\n", | ||
" <td>2023-08-04 23:25:00.186995</td>\n", | ||
" <td>True</td>\n", | ||
" <td>False</td>\n", | ||
" <td>85</td>\n", | ||
" <td>84</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" departure_datetime arrival_datetime airlines \\\n", | ||
"0 2023-10-28 14:05:00 2023-10-28 19:10:00 [Icelandair] \n", | ||
"1 2023-10-28 09:55:00 2023-10-28 20:20:00 [LOT] \n", | ||
"2 2023-10-28 11:50:00 2023-10-28 16:35:00 [KLMDelta] \n", | ||
"3 2023-10-28 11:00:00 2023-10-28 20:20:00 [Lufthansa, LOT] \n", | ||
"4 2023-10-28 10:25:00 2023-10-28 23:37:00 [Delta, Virgin Atlantic] \n", | ||
"5 2023-10-28 11:30:00 2023-10-28 21:40:00 [Delta, Virgin Atlantic, KLM] \n", | ||
"6 2023-10-28 11:25:00 2023-10-28 19:20:00 [Aer Lingus] \n", | ||
"7 2023-10-28 14:05:00 2023-10-28 19:15:00 [KLMDelta] \n", | ||
"8 2023-10-28 09:00:00 2023-10-28 14:25:00 [Lufthansa, Condor] \n", | ||
"9 2023-10-28 12:10:00 2023-10-28 15:00:00 [Lufthansa, United] \n", | ||
" departure_datetime arrival_datetime airlines \\\n", | ||
"0 2023-10-28 14:05:00 2023-10-28 19:10:00 [Icelandair] \n", | ||
"1 2023-10-28 09:55:00 2023-10-28 20:20:00 [LOT] \n", | ||
"2 2023-10-28 11:50:00 2023-10-28 16:35:00 [KLMDelta] \n", | ||
"3 2023-10-28 11:00:00 2023-10-28 20:20:00 [Lufthansa, LOT] \n", | ||
"4 2023-10-28 10:25:00 2023-10-28 23:37:00 [Delta, Virgin Atlantic] \n", | ||
"5 2023-10-28 07:00:00 2023-10-28 15:25:00 [KLMDelta] \n", | ||
"6 2023-10-28 11:25:00 2023-10-28 19:20:00 [Aer Lingus] \n", | ||
"7 2023-10-28 14:05:00 2023-10-28 19:15:00 [KLMDelta] \n", | ||
"8 2023-10-28 09:00:00 2023-10-28 14:25:00 [Lufthansa, Condor] \n", | ||
"9 2023-10-28 12:10:00 2023-10-28 15:00:00 [Lufthansa, United] \n", | ||
"\n", | ||
" travel_time origin destination layover_n layover_time layover_location \\\n", | ||
"0 665 MUC JFK 1 60.0 KEF \n", | ||
"1 985 MUC JFK 1 320.0 WAW \n", | ||
"2 645 MUC JFK 1 70.0 AMS \n", | ||
"3 920 MUC JFK 1 260.0 WAW \n", | ||
"4 1152 MUC JFK 1 385.0 ATL \n", | ||
"5 970 MUC JFK 1 270.0 DTW \n", | ||
"5 865 MUC JFK 1 290.0 AMS \n", | ||
"6 835 MUC JFK 1 220.0 DUB \n", | ||
"7 670 MUC JFK 1 95.0 AMS \n", | ||
"8 685 MUC JFK 1 105.0 FRA \n", | ||
"9 530 MUC JFK 0 NaN None \n", | ||
"\n", | ||
" price_eur price_trend price_value access_date one_way \\\n", | ||
"0 511 typical None 2023-08-03 19:44:53.852230 True \n", | ||
"1 627 typical None 2023-08-03 19:44:53.852262 True \n", | ||
"2 706 typical None 2023-08-03 19:44:53.852267 True \n", | ||
"3 586 typical None 2023-08-03 19:44:53.852273 True \n", | ||
"4 670 typical None 2023-08-03 19:44:53.852278 True \n", | ||
"5 723 typical None 2023-08-03 19:44:53.852282 True \n", | ||
"6 787 typical None 2023-08-03 19:44:53.852287 True \n", | ||
"7 812 typical None 2023-08-03 19:44:53.852291 True \n", | ||
"8 830 typical None 2023-08-03 19:44:53.852296 True \n", | ||
"9 1316 typical None 2023-08-03 19:44:53.852300 True \n", | ||
"0 511 typical None 2023-08-04 23:25:00.186922 True \n", | ||
"1 627 typical None 2023-08-04 23:25:00.186958 True \n", | ||
"2 706 typical None 2023-08-04 23:25:00.186963 True \n", | ||
"3 586 typical None 2023-08-04 23:25:00.186968 True \n", | ||
"4 670 typical None 2023-08-04 23:25:00.186973 True \n", | ||
"5 706 typical None 2023-08-04 23:25:00.186978 True \n", | ||
"6 787 typical None 2023-08-04 23:25:00.186982 True \n", | ||
"7 812 typical None 2023-08-04 23:25:00.186987 True \n", | ||
"8 830 typical None 2023-08-04 23:25:00.186992 True \n", | ||
"9 1317 typical None 2023-08-04 23:25:00.186995 True \n", | ||
"\n", | ||
" has_train days_advance \n", | ||
"0 False 85 \n", | ||
"1 False 85 \n", | ||
"2 False 85 \n", | ||
"3 False 85 \n", | ||
"4 False 85 \n", | ||
"5 False 85 \n", | ||
"6 False 85 \n", | ||
"7 False 85 \n", | ||
"8 False 85 \n", | ||
"9 False 85 " | ||
"0 False 84 \n", | ||
"1 False 84 \n", | ||
"2 False 84 \n", | ||
"3 False 84 \n", | ||
"4 False 84 \n", | ||
"5 False 84 \n", | ||
"6 False 84 \n", | ||
"7 False 84 \n", | ||
"8 False 84 \n", | ||
"9 False 84 " | ||
] | ||
}, | ||
"execution_count": 3, | ||
|
@@ -319,7 +319,7 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
|
@@ -328,7 +328,7 @@ | |
"'https://www.google.com/travel/flights?q=Flights%20to%20JFK%20from%20MUC%20on%202023-10-28%20oneway&curr=EUR&gl=IT'" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
|
@@ -339,29 +339,55 @@ | |
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'https://www.google.com/travel/flights?q=Flights%20to%20JFK%20from%20%28MUC%2CFCO%29%20on%202023-10-28%20oneway&curr=EUR&gl=IT'" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import private.private as private\n", | ||
"from src.flight_analysis.database import Database\n", | ||
"\n", | ||
"db = Database(\n", | ||
" db_host=private.DB_HOST,\n", | ||
" db_name=private.DB_NAME,\n", | ||
" db_user=private.DB_USER,\n", | ||
" db_pw=private.DB_PW,\n", | ||
" db_table=private.DB_TABLE,\n", | ||
")\n", | ||
"\n", | ||
"db\n", | ||
"base = \"https://www.google.com/travel/flights?q=Flights%20to%20JFK%20from%20MUC%20on%202023-10-28%20oneway&curr=EUR&gl=IT\"\n", | ||
"\n", | ||
"cursor = db.conn.cursor()\n", | ||
"cursor.execute(f\"SELECT * FROM {self.db_name}\")\n", | ||
"a = \"https://www.google.com/travel/flights?q=Flights%20to%20JFK%20from%20%28MUC%2CFCO%29%20on%202023-10-28%20oneway&curr=EUR&gl=IT\"\n", | ||
"a" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "AssertionError", | ||
"evalue": "", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[1;32m/Users/emanuelesalonico/Library/CloudStorage/[email protected]/My Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb Cell 6\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/emanuelesalonico/Library/CloudStorage/GoogleDrive-esalonico%40gmail.com/My%20Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb#X10sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m parser \u001b[39m=\u001b[39m StandardParser()\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/emanuelesalonico/Library/CloudStorage/GoogleDrive-esalonico%40gmail.com/My%20Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb#X10sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mtest\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mrb\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mas\u001b[39;00m fh:\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/emanuelesalonico/Library/CloudStorage/GoogleDrive-esalonico%40gmail.com/My%20Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb#X10sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39m# print(fh.read())\u001b[39;00m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/emanuelesalonico/Library/CloudStorage/GoogleDrive-esalonico%40gmail.com/My%20Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb#X10sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m output \u001b[39m=\u001b[39m parser\u001b[39m.\u001b[39;49mparse_message(fh, \u001b[39m\"\u001b[39;49m\u001b[39mmessage\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/emanuelesalonico/Library/CloudStorage/GoogleDrive-esalonico%40gmail.com/My%20Drive/SYNC/Dev/flight-analysis/flight_analysis.ipynb#X10sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mprint\u001b[39m(output)\n", | ||
"File \u001b[0;32m~/miniforge3/envs/flight-analysis/lib/python3.10/site-packages/protobuf_inspector/types.py:75\u001b[0m, in \u001b[0;36mStandardParser.parse_message\u001b[0;34m(self, file, gtype, endgroup)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mtype\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39mtype\u001b[39m \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mmessage\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 74\u001b[0m end \u001b[39m=\u001b[39m [\u001b[39mNone\u001b[39;00m]\n\u001b[0;32m---> 75\u001b[0m x \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparse_message(file, \u001b[39mtype\u001b[39;49m, end)\n\u001b[1;32m 76\u001b[0m x \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgroup (end \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m) \u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m fg4(\u001b[39mstr\u001b[39m(end[\u001b[39m0\u001b[39m])) \u001b[39m+\u001b[39m x\n\u001b[1;32m 77\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgroups_observed \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", | ||
"File \u001b[0;32m~/miniforge3/envs/flight-analysis/lib/python3.10/site-packages/protobuf_inspector/types.py:59\u001b[0m, in \u001b[0;36mStandardParser.parse_message\u001b[0;34m(self, file, gtype, endgroup)\u001b[0m\n\u001b[1;32m 56\u001b[0m key, wire_type \u001b[39m=\u001b[39m read_identifier(file)\n\u001b[1;32m 57\u001b[0m \u001b[39mif\u001b[39;00m key \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39mbreak\u001b[39;00m\n\u001b[0;32m---> 59\u001b[0m x \u001b[39m=\u001b[39m read_value(file, wire_type)\n\u001b[1;32m 60\u001b[0m \u001b[39massert\u001b[39;00m(\u001b[39mnot\u001b[39;00m (x \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m))\n\u001b[1;32m 62\u001b[0m \u001b[39mif\u001b[39;00m wire_type \u001b[39m==\u001b[39m \u001b[39m4\u001b[39m:\n", | ||
"File \u001b[0;32m~/miniforge3/envs/flight-analysis/lib/python3.10/site-packages/protobuf_inspector/core.py:38\u001b[0m, in \u001b[0;36mread_value\u001b[0;34m(file, wire_type)\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[39mif\u001b[39;00m length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 37\u001b[0m c \u001b[39m=\u001b[39m file\u001b[39m.\u001b[39mread(length)\n\u001b[0;32m---> 38\u001b[0m \u001b[39massert\u001b[39;00m(\u001b[39mlen\u001b[39m(c) \u001b[39m==\u001b[39m length)\n\u001b[1;32m 39\u001b[0m \u001b[39mreturn\u001b[39;00m io\u001b[39m.\u001b[39mBytesIO(c)\n\u001b[1;32m 40\u001b[0m \u001b[39mif\u001b[39;00m wire_type \u001b[39m==\u001b[39m \u001b[39m3\u001b[39m \u001b[39mor\u001b[39;00m wire_type \u001b[39m==\u001b[39m \u001b[39m4\u001b[39m:\n", | ||
"\u001b[0;31mAssertionError\u001b[0m: " | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from protobuf_inspector.types import StandardParser\n", | ||
"\n", | ||
"for row in cursor:\n", | ||
" print(row)\n", | ||
" break" | ||
"parser = StandardParser()\n", | ||
"with open('test', 'rb') as fh:\n", | ||
" # print(fh.read())\n", | ||
" output = parser.parse_message(fh, \"message\")\n", | ||
"print(output)" | ||
] | ||
} | ||
], | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.