From e2c53193d5314b36538ea0dbf405c94205041b09 Mon Sep 17 00:00:00 2001 From: leonghui Date: Fri, 6 Dec 2024 21:31:13 +0000 Subject: [PATCH] Switch to multithreaded mobile API calls --- gwr_feed.py | 214 ++++++++++++++++++++++++++++++++--------------- gwr_feed_data.py | 15 ++++ 2 files changed, 160 insertions(+), 69 deletions(-) diff --git a/gwr_feed.py b/gwr_feed.py index 419dfb9..a33b325 100644 --- a/gwr_feed.py +++ b/gwr_feed.py @@ -1,5 +1,7 @@ from datetime import datetime, timedelta import json +import concurrent.futures + from croniter import croniter from flask import abort from requests.exceptions import JSONDecodeError, RequestException @@ -107,103 +109,177 @@ def get_price_entry(_dt: datetime, fare_text): return feed_item -def get_request_bodies(query, dates): - request_dict = {} - for date in dates: - request_body = { - "data": { - "adults": 1, - "destinationNlc": str(query.to_id), - "originNlc": str(query.from_id), - "outwardDateTime": date.isoformat(), - "outwardDepartAfter": True, - "railcards": [], - } +def has_departed(message_dict: dict): + message_text = str(message_dict.get("message-text")) + if message_text: + return "already departed" in message_text + else: + return False + + +def mobile_worker(query: DatetimeQuery, _date: datetime, result_dict: dict): + config = query.config + logger = config.logger + session = config.session + url = config.mobile_search_url + + log_header = f"{query.journey} {_date}" + + data = { + "destination-nlc": str(query.to_id), + "journey-type": "single", + "origin-nlc": str(query.from_id), + "outward-time": _date.isoformat() + "Z", + "outward-time-type": "leaving", + "passenger-groups": [{"adults": 1, "children": 0, "number-of-railcards": 0}], + } + + logger.debug(f"{log_header} - querying endpoint: {url}") + search_response = session.post( + url=url, headers=config.mobile_headers, data=json.dumps(data) + ) + search_dict = search_response.json() + + journeys = search_dict.get("data").get("outward") + + valid_journeys = [ + journey + for journey in journeys + # assume both query and results are using the same tz + if datetime.fromisoformat(journey.get("departure-time")).replace(tzinfo=None) + >= _date + and not has_departed(journey.get("messages")) + ] + + # skip if no results + if not valid_journeys: + result_dict[_date] = "Not found" + else: + closest_journey = min( + valid_journeys, key=lambda x: datetime.fromisoformat(x["departure-time"]) + ) + + cheapest_fare = closest_journey.get("cheapest-price") + + journey_dt = datetime.fromisoformat(closest_journey["departure-time"]) + + single_std_fares = closest_journey.get("single-fares").get("standard-class") + + matching_fare = next( + (fare for fare in single_std_fares if fare.get("price") == cheapest_fare), + None, + ) + + fare_text = f"£{'{0:.2f}'.format(cheapest_fare / 100)} ({matching_fare.get("fare-name")})" + + result_dict[journey_dt] = fare_text + + +def get_request_body(query: DatetimeQuery, _date): + + return { + "data": { + "adults": 1, + "destinationNlc": str(query.to_id), + "originNlc": str(query.from_id), + "outwardDateTime": _date.isoformat(), + "outwardDepartAfter": True, + "railcards": [], } - request_dict[date] = request_body + } - return request_dict +def web_worker(query: DatetimeQuery, _date: datetime, result_dict: dict): + body = get_request_body(query, _date) -def get_dates(query): - if isinstance(query, DatetimeQuery): - return [ - query.query_dt + timedelta(days=(7 * x)) - for x in range(query.weeks_ahead + 1) - ] - elif isinstance(query, CronQuery): - base = datetime.now() + timedelta(days=(7 * query.skip_weeks)) - iter = croniter(query.job_str, base) - return [iter.get_next(datetime) for _ in range(0, query.count)] + json_dict = get_response_dict(query.config.journey_url, query, body) + if json_dict: + journeys = json_dict["data"]["outwardservices"] -def get_item_listing(query): + filtered_journeys = None - dates = get_dates(query) + if journeys: + # assume next journey is closest to requested time + filtered_journeys = [ + journey + for journey in journeys + if datetime.fromisoformat(journey["departuredatetime"]) >= _date + ] - request_dict = get_request_bodies(query, dates) + if filtered_journeys: + first_journey = filtered_journeys[0] - result_dict = {} + departure_dt = datetime.fromisoformat(first_journey["departuredatetime"]) - for date, body in request_dict.items(): + fares = first_journey["cheapestfareselection"] - json_dict = get_response_dict(query.config.journey_url, query, body) + if isinstance(fares, dict): + fare_types = first_journey["otherfaregroups"] - if json_dict: - journeys = json_dict["data"]["outwardservices"] + selected_fare = fares["cheapest"] - filtered_journeys = None + selected_fare_type = [ + fare_type + for fare_type in fare_types + if fare_type["faregroupid"] == selected_fare["singlefaregroupid"] + ][0] - if journeys: - # assume next journey is closest to requested time - filtered_journeys = [ - journey - for journey in journeys - if datetime.fromisoformat(journey["departuredatetime"]) >= date - ] + remaining_seats = selected_fare_type["availablespaces"] + fare_type_name = selected_fare_type["faregroupname"] - if filtered_journeys: - first_journey = filtered_journeys[0] + fare_price = "{:.2f}".format(selected_fare["singlefarecost"] / 100) - departure_dt = datetime.fromisoformat( - first_journey["departuredatetime"] - ) + fare_text = [ + query.config.currency, + fare_price, + f"({fare_type_name})", + ] - fares = first_journey["cheapestfareselection"] + # 'availablespaces' appears to be defaulted to 9 so we will ignore that + if query.seats_left and remaining_seats and remaining_seats != 9: + fare_text.insert(2, f"({remaining_seats} left)") + + result_dict[departure_dt] = " ".join(fare_text) + else: + result_dict[_date] = "Not found" - if isinstance(fares, dict): - fare_types = first_journey["otherfaregroups"] - selected_fare = fares["cheapest"] +def get_dates(query): + if isinstance(query, DatetimeQuery): + return [ + query.query_dt + timedelta(days=(7 * x)) + for x in range(query.weeks_ahead + 1) + ] + elif isinstance(query, CronQuery): + base = datetime.now() + timedelta(days=(7 * query.skip_weeks)) + iter = croniter(query.job_str, base) + return [iter.get_next(datetime) for _ in range(0, query.count)] - selected_fare_type = [ - fare_type - for fare_type in fare_types - if fare_type["faregroupid"] - == selected_fare["singlefaregroupid"] - ][0] - remaining_seats = selected_fare_type["availablespaces"] - fare_type_name = selected_fare_type["faregroupname"] +def get_pooled_results(query: DatetimeQuery, worker_type): + dates = get_dates(query) - fare_price = "{:.2f}".format(selected_fare["singlefarecost"] / 100) + pool = concurrent.futures.ThreadPoolExecutor(max_workers=len(dates)) - fare_text = [ - query.config.currency, - fare_price, - f"({fare_type_name})", - ] + result_dict = {} - # 'availablespaces' appears to be defaulted to 9 so we will ignore that - if query.seats_left and remaining_seats and remaining_seats != 9: - fare_text.insert(2, f"({remaining_seats} left)") + for _date in dates: + pool.submit(worker_type(query, _date, result_dict)) - result_dict[departure_dt] = " ".join(fare_text) - else: - result_dict[date] = "Not found" + pool.shutdown(wait=True) feed_items = generate_items(query, result_dict) + return feed_items + + +# Default to using mobile API calls which are faster but do not return remaining seats +def get_item_listing(query: DatetimeQuery, use_mobile_api=True): + feed_items = get_pooled_results( + query, mobile_worker if use_mobile_api else web_worker + ) + json_feed = get_top_level_feed(query, [feed_items]) return json_feed diff --git a/gwr_feed_data.py b/gwr_feed_data.py index d69d674..16f2180 100644 --- a/gwr_feed_data.py +++ b/gwr_feed_data.py @@ -17,6 +17,10 @@ BASKET_URI = "/customer/basket" FAVICON_URI = "/img/favicons/favicon.ico" QUERY_LIMIT = 4 +X_APP_KEY = "69a273923b31ee667d3593235f91211be1a34232" +APP_VERSION = "4.52.0" +MOBILE_BASE_URL = "https://prod.mobileapi." + GWR_DOMAIN +MOBILE_SEARCH_URI = "/api/v3/train/ticket/search" request_headers = { "User-Agent": "", @@ -24,6 +28,15 @@ "Cache-Control": "no-cache", } +mobile_request_headers = { + "Accept-Encoding": "gzip", + "AppVersion": APP_VERSION, + "Content-Type": "application/json; charset=UTF-8", + "User-Agent": "okhttp/4.10.0", + "X-App-Key": X_APP_KEY, + "X-App-Platform": "Android", +} + @dataclass() class FeedConfig: @@ -40,6 +53,8 @@ class FeedConfig: basket_url: str = GWR_API_URL + BASKET_URI currency: str = CURRENCY_CODE headers: dict = field(default_factory=lambda: request_headers) + mobile_search_url: str = MOBILE_BASE_URL + MOBILE_SEARCH_URI + mobile_headers: dict = field(default_factory=lambda: mobile_request_headers) @dataclass