Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

postpone timezone regex evaluation until first use - shaves off time from package import #1181

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 35 additions & 7 deletions dateparser/timezone_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def __getinitargs__(self):


def pop_tz_offset_from_string(date_string, as_offset=True):
if _search_regex_ignorecase.search(date_string):
for name, info in _tz_offsets:
if TzRegexCache.search_regex_ignorecase().search(date_string):
for name, info in TzRegexCache.tz_offsets():
timezone_re = info["regex"]
timezone_match = timezone_re.search(date_string)
if timezone_match:
Expand All @@ -47,7 +47,7 @@ def pop_tz_offset_from_string(date_string, as_offset=True):


def word_is_tz(word):
return bool(_search_regex.match(word))
return bool(TzRegexCache.search_regex().match(word))


def convert_to_local_tz(datetime_obj, datetime_tz_offset):
Expand Down Expand Up @@ -85,8 +85,36 @@ def get_local_tz_offset():
return offset


_search_regex_parts = []
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
_search_regex = re.compile("|".join(_search_regex_parts))
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
class TzRegexCache:
_ready = False
_search_regex_parts = []
_tz_offsets = []
_search_regex = None
_search_regex_ignorecase = None

@classmethod
def prepare(cls):
if not cls._ready:
cls._search_regex_parts = []
cls._tz_offsets = list(build_tz_offsets(cls._search_regex_parts))
cls._search_regex = re.compile("|".join(cls._search_regex_parts))
cls._search_regex_ignorecase = re.compile("|".join(cls._search_regex_parts), re.IGNORECASE)
cls._ready = True

@classmethod
def tz_offsets(cls):
cls.prepare()
return cls._tz_offsets

@classmethod
def search_regex(cls):
cls.prepare()
return cls._search_regex

@classmethod
def search_regex_ignorecase(cls):
cls.prepare()
return cls._search_regex_ignorecase


local_tz_offset = get_local_tz_offset()
6 changes: 3 additions & 3 deletions dateparser/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pytz import UTC, UnknownTimeZoneError, timezone
from tzlocal import get_localzone

from dateparser.timezone_parser import StaticTzInfo, _tz_offsets
from dateparser.timezone_parser import StaticTzInfo, TzRegexCache


def strip_braces(date_string):
Expand Down Expand Up @@ -73,7 +73,7 @@ def get_timezone_from_tz_string(tz_string):
try:
return timezone(tz_string)
except UnknownTimeZoneError as e:
for name, info in _tz_offsets:
for name, info in TzRegexCache.tz_offsets():
if info["regex"].search(" %s" % tz_string):
return StaticTzInfo(name, info["offset"])
else:
Expand Down Expand Up @@ -104,7 +104,7 @@ def apply_tzdatabase_timezone(date_time, pytz_string):


def apply_dateparser_timezone(utc_datetime, offset_or_timezone_abb):
for name, info in _tz_offsets:
for name, info in TzRegexCache.tz_offsets():
if info["regex"].search(" %s" % offset_or_timezone_abb):
tz = StaticTzInfo(name, info["offset"])
return utc_datetime.astimezone(tz)
Expand Down