From a673b6f2b15984acfba87d01f7b2302eb0775e0a Mon Sep 17 00:00:00 2001 From: xinluh Date: Fri, 2 Jun 2023 18:03:53 -0600 Subject: [PATCH 1/4] Fix Yahoo source invalid cookie / invalid crumb for v7 endpoint Using approach similar to https://github.com/karlwancl/YahooFinanceApi/commit/14b6c587d94ebe551f11f44c6b954d64285b831c --- beanprice/sources/yahoo.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/beanprice/sources/yahoo.py b/beanprice/sources/yahoo.py index 64c8b2b..0cf76d3 100644 --- a/beanprice/sources/yahoo.py +++ b/beanprice/sources/yahoo.py @@ -106,12 +106,23 @@ def get_price_series(ticker: str, return series, currency +# caching cookies for efficiency when making multiple API calls +_session = None +_crumb = None + + class Source(source.Source): "Yahoo Finance CSV API price extractor." def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: """See contract in beanprice.source.Source.""" + if _session is None or _crumb is None: + _session = requests.Session() + _session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}) + _session.get('https://fc.yahoo.com') + _crumb = _session.get('https://query1.finance.yahoo.com/v1/test/getcrumb').text + url = "https://query1.finance.yahoo.com/v7/finance/quote" fields = ['symbol', 'regularMarketPrice', 'regularMarketTime'] payload = { @@ -120,7 +131,7 @@ def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: 'exchange': 'NYSE', } payload.update(_DEFAULT_PARAMS) - response = requests.get(url, params=payload, headers={'User-Agent': None}) + response = _session.get(url, params=payload, headers={'User-Agent': None}) try: result = parse_response(response) except YahooError as error: From d24021570848bb3c42044b21fc85ce9ddef58bc0 Mon Sep 17 00:00:00 2001 From: xinluh Date: Fri, 2 Jun 2023 18:08:43 -0600 Subject: [PATCH 2/4] user agent should be changed as well --- beanprice/sources/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beanprice/sources/yahoo.py b/beanprice/sources/yahoo.py index 0cf76d3..148fe46 100644 --- a/beanprice/sources/yahoo.py +++ b/beanprice/sources/yahoo.py @@ -131,7 +131,7 @@ def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: 'exchange': 'NYSE', } payload.update(_DEFAULT_PARAMS) - response = _session.get(url, params=payload, headers={'User-Agent': None}) + response = _session.get(url, params=payload) try: result = parse_response(response) except YahooError as error: From ebf4224ca2f496370e3e7fb874ed9c7e660453a3 Mon Sep 17 00:00:00 2001 From: xinluh Date: Fri, 2 Jun 2023 18:12:26 -0600 Subject: [PATCH 3/4] forget about caching. doesn't seem to work as expected --- beanprice/sources/yahoo.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/beanprice/sources/yahoo.py b/beanprice/sources/yahoo.py index 148fe46..7128371 100644 --- a/beanprice/sources/yahoo.py +++ b/beanprice/sources/yahoo.py @@ -106,22 +106,16 @@ def get_price_series(ticker: str, return series, currency -# caching cookies for efficiency when making multiple API calls -_session = None -_crumb = None - - class Source(source.Source): "Yahoo Finance CSV API price extractor." def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: """See contract in beanprice.source.Source.""" - if _session is None or _crumb is None: - _session = requests.Session() - _session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}) - _session.get('https://fc.yahoo.com') - _crumb = _session.get('https://query1.finance.yahoo.com/v1/test/getcrumb').text + session = requests.Session() + session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}) + session.get('https://fc.yahoo.com') + crumb = session.get('https://query1.finance.yahoo.com/v1/test/getcrumb').text url = "https://query1.finance.yahoo.com/v7/finance/quote" fields = ['symbol', 'regularMarketPrice', 'regularMarketTime'] @@ -129,9 +123,10 @@ def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: 'symbols': ticker, 'fields': ','.join(fields), 'exchange': 'NYSE', + 'crumb': crumb, } payload.update(_DEFAULT_PARAMS) - response = _session.get(url, params=payload) + response = session.get(url, params=payload) try: result = parse_response(response) except YahooError as error: From 56e906cd9c8e25459bd65f3b1672c1f367918441 Mon Sep 17 00:00:00 2001 From: xinluh Date: Fri, 2 Jun 2023 18:14:08 -0600 Subject: [PATCH 4/4] add comments --- beanprice/sources/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beanprice/sources/yahoo.py b/beanprice/sources/yahoo.py index 7128371..b08a2c2 100644 --- a/beanprice/sources/yahoo.py +++ b/beanprice/sources/yahoo.py @@ -114,7 +114,7 @@ def get_latest_price(self, ticker: str) -> Optional[source.SourcePrice]: session = requests.Session() session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}) - session.get('https://fc.yahoo.com') + session.get('https://fc.yahoo.com') # This populates the correct cookies in the session crumb = session.get('https://query1.finance.yahoo.com/v1/test/getcrumb').text url = "https://query1.finance.yahoo.com/v7/finance/quote"