From 88e7bbd962000d2019d745bd2852052ae47eb401 Mon Sep 17 00:00:00 2001 From: Emanuele Salonico Date: Sun, 1 Oct 2023 14:53:01 +0200 Subject: [PATCH] bug fix --- flight_analysis.ipynb | 383 ++-------------------------------- flight_analysis.py | 4 +- routes.ini | 11 +- src/flight_analysis/flight.py | 6 +- 4 files changed, 29 insertions(+), 375 deletions(-) diff --git a/flight_analysis.ipynb b/flight_analysis.ipynb index 994b512..ee679c2 100644 --- a/flight_analysis.ipynb +++ b/flight_analysis.ipynb @@ -13,392 +13,37 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "None\n" - ] - } - ], + "outputs": [], "source": [ - "flights = Scrape(\"BER\", \"SIN\", \"2023-10-03\")\n", + "flights = Scrape(\"FMM\", \"FCO\", \"2023-12-29\")\n", "flights.run_scrape()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "flights.data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
departure_datetimearrival_datetimeairlinestravel_timeorigindestinationlayover_nlayover_timelayover_locationprice_eurprice_trendprice_valueaccess_dateone_wayhas_traindays_advance
02023-10-03 16:30:002023-10-04 15:45:00[Qatar Airways]0 days 17:15:00BERSIN10 days 03:30:00[DOH]821typicalNone2023-09-27 17:41:45.857643TrueFalse5
12023-10-03 15:20:002023-10-04 15:50:00[Air France]0 days 18:30:00BERSIN10 days 03:40:00[CDG]880typicalNone2023-09-27 17:41:45.857671TrueFalse5
22023-10-03 19:35:002023-10-04 17:50:00[Turkish Airlines]0 days 16:15:00BERSIN10 days 02:50:00[IST]1090typicalNone2023-09-27 17:41:45.857673TrueFalse6
32023-10-03 16:50:002023-10-05 20:00:00[Wizz Air, Vistara, IndiGo]1 days 21:10:00BERSIN3NaT[BUD, DMM, BOM]328typicalNone2023-09-27 17:41:45.857676TrueFalse5
42023-10-03 07:10:002023-10-04 20:00:00[easyJet, Wizz Air, IndiGo]1 days 06:50:00BERSIN3NaT[FCO, DMM, BOM]400typicalNone2023-09-27 17:41:45.857678TrueFalse5
52023-10-03 07:10:002023-10-04 17:45:00[easyJet, Wizz Air, IndiGo]1 days 04:35:00BERSIN3NaT[FCO, DMM, DEL]403typicalNone2023-09-27 17:41:45.857680TrueFalse5
62023-10-03 13:15:002023-10-04 17:45:00[Pegasus, IndiGo]0 days 22:30:00BERSIN3NaT[SAW, DOH, DEL]539typicalNone2023-09-27 17:41:45.857683TrueFalse5
72023-10-03 13:04:002023-10-05 09:55:00[Flex, Flight, Etihad]1 days 14:51:00BERSIN2NaT[FRA, AUH]581typicalNone2023-09-27 17:41:45.857685TrueTrue5
82023-10-03 08:55:002023-10-04 12:50:00[Finnair, Jetstar]0 days 21:55:00BERSIN2NaT[HEL, BKK]750typicalNone2023-09-27 17:41:45.857687TrueFalse5
92023-10-03 19:15:002023-10-04 19:30:00[Finnair]0 days 18:15:00BERSIN10 days 03:20:00[HEL]1203typicalNone2023-09-27 17:41:45.857690TrueFalse6
102023-10-03 10:40:002023-10-04 09:00:00[Turkish Airlines]0 days 16:20:00BERSIN10 days 02:40:00[IST]1300typicalNone2023-09-27 17:41:45.857692TrueFalse5
112023-10-03 17:30:002023-10-04 15:30:00[KLM]0 days 16:00:00BERSIN10 days 02:00:00[AMS]1708typicalNone2023-09-27 17:41:45.857694TrueFalse5
122023-10-03 09:35:002023-10-04 05:55:00[SWISS, Singapore Airlines]0 days 14:20:00BERSIN10 days 00:40:00[ZRH]2661typicalNone2023-09-27 17:41:45.857696TrueFalse5
\n", - "
" - ], "text/plain": [ - " departure_datetime arrival_datetime airlines \\\n", - "0 2023-10-03 16:30:00 2023-10-04 15:45:00 [Qatar Airways] \n", - "1 2023-10-03 15:20:00 2023-10-04 15:50:00 [Air France] \n", - "2 2023-10-03 19:35:00 2023-10-04 17:50:00 [Turkish Airlines] \n", - "3 2023-10-03 16:50:00 2023-10-05 20:00:00 [Wizz Air, Vistara, IndiGo] \n", - "4 2023-10-03 07:10:00 2023-10-04 20:00:00 [easyJet, Wizz Air, IndiGo] \n", - "5 2023-10-03 07:10:00 2023-10-04 17:45:00 [easyJet, Wizz Air, IndiGo] \n", - "6 2023-10-03 13:15:00 2023-10-04 17:45:00 [Pegasus, IndiGo] \n", - "7 2023-10-03 13:04:00 2023-10-05 09:55:00 [Flex, Flight, Etihad] \n", - "8 2023-10-03 08:55:00 2023-10-04 12:50:00 [Finnair, Jetstar] \n", - "9 2023-10-03 19:15:00 2023-10-04 19:30:00 [Finnair] \n", - "10 2023-10-03 10:40:00 2023-10-04 09:00:00 [Turkish Airlines] \n", - "11 2023-10-03 17:30:00 2023-10-04 15:30:00 [KLM] \n", - "12 2023-10-03 09:35:00 2023-10-04 05:55:00 [SWISS, Singapore Airlines] \n", - "\n", - " travel_time origin destination layover_n layover_time \\\n", - "0 0 days 17:15:00 BER SIN 1 0 days 03:30:00 \n", - "1 0 days 18:30:00 BER SIN 1 0 days 03:40:00 \n", - "2 0 days 16:15:00 BER SIN 1 0 days 02:50:00 \n", - "3 1 days 21:10:00 BER SIN 3 NaT \n", - "4 1 days 06:50:00 BER SIN 3 NaT \n", - "5 1 days 04:35:00 BER SIN 3 NaT \n", - "6 0 days 22:30:00 BER SIN 3 NaT \n", - "7 1 days 14:51:00 BER SIN 2 NaT \n", - "8 0 days 21:55:00 BER SIN 2 NaT \n", - "9 0 days 18:15:00 BER SIN 1 0 days 03:20:00 \n", - "10 0 days 16:20:00 BER SIN 1 0 days 02:40:00 \n", - "11 0 days 16:00:00 BER SIN 1 0 days 02:00:00 \n", - "12 0 days 14:20:00 BER SIN 1 0 days 00:40:00 \n", - "\n", - " layover_location price_eur price_trend price_value \\\n", - "0 [DOH] 821 typical None \n", - "1 [CDG] 880 typical None \n", - "2 [IST] 1090 typical None \n", - "3 [BUD, DMM, BOM] 328 typical None \n", - "4 [FCO, DMM, BOM] 400 typical None \n", - "5 [FCO, DMM, DEL] 403 typical None \n", - "6 [SAW, DOH, DEL] 539 typical None \n", - "7 [FRA, AUH] 581 typical None \n", - "8 [HEL, BKK] 750 typical None \n", - "9 [HEL] 1203 typical None \n", - "10 [IST] 1300 typical None \n", - "11 [AMS] 1708 typical None \n", - "12 [ZRH] 2661 typical None \n", - "\n", - " access_date one_way has_train days_advance \n", - "0 2023-09-27 17:41:45.857643 True False 5 \n", - "1 2023-09-27 17:41:45.857671 True False 5 \n", - "2 2023-09-27 17:41:45.857673 True False 6 \n", - "3 2023-09-27 17:41:45.857676 True False 5 \n", - "4 2023-09-27 17:41:45.857678 True False 5 \n", - "5 2023-09-27 17:41:45.857680 True False 5 \n", - "6 2023-09-27 17:41:45.857683 True False 5 \n", - "7 2023-09-27 17:41:45.857685 True True 5 \n", - "8 2023-09-27 17:41:45.857687 True False 5 \n", - "9 2023-09-27 17:41:45.857690 True False 6 \n", - "10 2023-09-27 17:41:45.857692 True False 5 \n", - "11 2023-09-27 17:41:45.857694 True False 5 \n", - "12 2023-09-27 17:41:45.857696 True False 5 " + "'https://www.google.com/travel/flights?q=Flights%20to%20FCO%20from%20FMM%20on%202023-12-29%20oneway&curr=EUR&gl=IT'" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "flights.data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [ "flights._url" ] diff --git a/flight_analysis.py b/flight_analysis.py index d8aea34..cc47a75 100644 --- a/flight_analysis.py +++ b/flight_analysis.py @@ -121,6 +121,8 @@ def get_routes_df(routes: list): final_df["uuid"] = [uuid.uuid4() for _ in range(final_df.shape[0])] final_df = final_df.set_index("uuid") + final_df.to_csv(f"{route}.csv") + return final_df @@ -182,7 +184,7 @@ def generate_layovers_df_from_flights(flights_df): # scrape routes into a dataframe scraped_flights = get_routes_df(routes) print(scraped_flights) - + # generate airline and layovers dataframe scraped_airlines = generate_airlines_df_from_flights(scraped_flights) scraped_layovers = generate_layovers_df_from_flights(scraped_flights) diff --git a/routes.ini b/routes.ini index c7d6432..7e961ea 100644 --- a/routes.ini +++ b/routes.ini @@ -1,7 +1,14 @@ [routes] ; Format: [origin, destination, range_of_days_from_today] -ber_sin = ["BER", "SIN", 3] +; ber_sin = ["BER", "SIN", 3] ; fco_muc = ["FCO", "MUC", 90] ; fmm_fco = ["FMM", "FCO", 90] -; fco_fmm = ["FCO", "FMM", 90] \ No newline at end of file +; fco_fmm = ["FCO", "FMM", 90] + +muc_ika = ["MUC", "IKA", 50] +ika_muc = ["IKA", "MUC", 50] +fco_muc = ["FCO", "MUC", 90] +muc_fco = ["MUC", "FCO", 90] +fmm_fco = ["FMM", "FCO", 90] +fco_fmm = ["FCO", "FMM", 90] diff --git a/src/flight_analysis/flight.py b/src/flight_analysis/flight.py index 9669c2d..4d663d7 100644 --- a/src/flight_analysis/flight.py +++ b/src/flight_analysis/flight.py @@ -19,7 +19,7 @@ def __init__(self, dl, roundtrip, queried_orig, queried_dest, price_trend, *args self._airline = None self._flight_time = None self._layover_n = None - self._layover_time = None + self._layover_time = timedelta() self._layover_location = None self._price = None self._price_trend = price_trend @@ -154,7 +154,7 @@ def _parse_layover_times_location(self, arg): """ From an argument (arg), returns the layover time and location as a tuple """ - layover_time = None + layover_time = timedelta() layover_location = None # layover time @@ -301,7 +301,7 @@ def convert_duration_str_to_timedelta(s): 5 hr 55 min --> 60*5 + 55 = 355 """ if s is None or not bool(re.search("hr|min", str(s))): - return None + return timedelta() h = 0 m = 0