Skip to content

Commit

Permalink
Update bot detection
Browse files Browse the repository at this point in the history
  • Loading branch information
leonghui committed Nov 3, 2024
1 parent 3dd3717 commit 2666cbf
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
6 changes: 3 additions & 3 deletions amazon_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from flask import abort
from requests.exceptions import JSONDecodeError, RequestException

from amazon_feed_data import AmazonListingQuery, AmazonItemQuery
from amazon_feed_data import AmazonListingQuery, AmazonItemQuery, BOT_PATTERN
from json_feed_data import JsonFeedTopLevel, JsonFeedItem, JSONFEED_VERSION_URL


Expand Down Expand Up @@ -92,12 +92,12 @@ def get_response_dict(url, query):

# return HTTP error code
if not response.ok:
if response.status_code == 503 or response.text.find("captcha") >= 0:
if response.status_code == 503 or re.search(BOT_PATTERN, response.text):
bot_msg = f'"{query.query_str}" - API paywall triggered, resetting session'
reset_query_session(query)

logger.warning(bot_msg)
abort(503, description=bot_msg)
abort(429, description=bot_msg)
else:
logger.error(f'"{query.query_str}" - error from source')
logger.debug(f'"{query.query_str}" - dumping response: {response.text}')
Expand Down
1 change: 1 addition & 0 deletions amazon_feed_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


ASIN_PATTERN = r"^(B[\dA-Z]{9}|\d{9}(X|\d))$"
BOT_PATTERN = r"automated access|captcha"


class UnavailabilityText(str, Enum): # allow comparison with strings
Expand Down

0 comments on commit 2666cbf

Please sign in to comment.