Skip to content

Commit

Permalink
Timezone fixes (#235)
Browse files Browse the repository at this point in the history
* Improved timezone settings

* Add tzlocal to requirements

* Fixed docs for timezone related settings

* removed hacky tzaware dates fix

* tests for apply_timezone and localize_timezone funcs

* Added tests for timezone conversion

* Fixed timezone conversion tests
  • Loading branch information
waqasshabbir committed Sep 26, 2016
1 parent 6031899 commit a7e96d3
Show file tree
Hide file tree
Showing 14 changed files with 133 additions and 47 deletions.
19 changes: 13 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,22 @@ For more on date order, please look at `Settings`_.
Timezone and UTC Offset
-----------------------

Dateparser assumes all dates to be in UTC if no timezone is specified in the string. To convert the resultant `datetime` object to required timezone. You can do the following:
`dateparser` automatically detects the timezone if given in the date string. If date has no timezone name/abbreviation or offset, you can still specify it using `TIMEZONE` setting.

>>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'})
datetime.datetime(2012, 1, 12, 17, 0)
datetime.datetime(2012, 1, 12, 22, 0)

Support for tzaware objects:
You can also convert from one time zone to another using `TO_TIMEZONE` setting.

>>> parse('10:00 am', settings={'TO_TIMEZONE': 'EDT', 'TIMEZONE': 'EST'})
datetime.datetime(2016, 9, 25, 11, 0)

>>> parse('12 Feb 2015 10:56 PM EST', settings={'RETURN_AS_TIMEZONE_AWARE': True})
datetime.datetime(2015, 2, 13, 3, 56, tzinfo=<StaticTzInfo 'UTC'>)
>>> parse('10:00 am EST', settings={'TO_TIMEZONE': 'EDT'})
datetime.datetime(2016, 9, 25, 11, 0)

Support for tzaware objects:

>>> parse('12 Feb 2015 10:56 PM EST', settings={'RETURN_AS_TIMEZONE_AWARE': True, 'TIMEZONE': None})
>>> parse('12 Feb 2015 10:56 PM EST', settings={'RETURN_AS_TIMEZONE_AWARE': True})
datetime.datetime(2015, 2, 12, 22, 56, tzinfo=<StaticTzInfo 'EST'>)

For more on timezones, please look at `Settings`_.
Expand Down Expand Up @@ -179,11 +184,13 @@ Dependencies
* ruamel.yaml_ for reading language and configuration files.
* jdatetime_ to convert *Jalali* dates to *Gregorian*.
* umalqurra_ to convert *Hijri* dates to *Gregorian*.
* tzlocal_ to reliably get local timezone.

.. _dateutil: https://pypi.python.org/pypi/python-dateutil
.. _ruamel.yaml: https://pypi.python.org/pypi/ruamel.yaml
.. _jdatetime: https://pypi.python.org/pypi/jdatetime
.. _umalqurra: https://pypi.python.org/pypi/umalqurra/
.. _tzlocal: https://pypi.python.org/pypi/tzlocal


Supported languages
Expand Down
3 changes: 2 additions & 1 deletion data/settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ settings:
PREFER_DAY_OF_MONTH: 'current'
SKIP_TOKENS: ["t"]
SKIP_TOKENS_PARSER: ["t", "year", "hour", "minute"]
TIMEZONE: 'UTC'
TIMEZONE: 'local'
TO_TIMEZONE: False
RETURN_AS_TIMEZONE_AWARE: False
NORMALIZE: True
RELATIVE_BASE: False
Expand Down
19 changes: 13 additions & 6 deletions dateparser/date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

import six

from tzlocal import get_localzone

from .timezone_parser import pop_tz_offset_from_string
from .utils import strip_braces, apply_timezone
from .utils import strip_braces, apply_timezone, localize_timezone
from .conf import apply_settings
from .parser import parse

Expand All @@ -19,15 +21,20 @@ def parse(self, date_string, settings=None):
raise ValueError("Empty string")

date_string = strip_braces(date_string)
date_string, tz = pop_tz_offset_from_string(date_string)
date_string, ptz = pop_tz_offset_from_string(date_string)

date_obj, period = parse(date_string, settings=settings)

if tz is not None:
date_obj = tz.localize(date_obj)
if ptz is not None:
date_obj = ptz.localize(date_obj)
elif 'local' in settings.TIMEZONE.lower():
stz = get_localzone()
date_obj = stz.localize(date_obj)
else:
date_obj = localize_timezone(date_obj, settings.TIMEZONE)

if settings.TIMEZONE:
date_obj = apply_timezone(date_obj, settings.TIMEZONE)
if settings.TO_TIMEZONE:
date_obj = apply_timezone(date_obj, settings.TO_TIMEZONE)

if not settings.RETURN_AS_TIMEZONE_AWARE:
date_obj = date_obj.replace(tzinfo=None)
Expand Down
21 changes: 10 additions & 11 deletions dateparser/freshness_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,7 @@
class FreshnessDateDataParser(object):
""" Parses date string like "1 year, 2 months ago" and "3 hours, 50 minutes ago" """
def __init__(self):
self._now = None

@property
def now(self):
return self._now if self._now else datetime.utcnow()

@now.setter
def now(self, value):
self._now = value
self.now = None

def _are_all_words_units(self, date_string):
skip = [_UNITS,
Expand All @@ -52,6 +44,13 @@ def parse(self, date_string, settings):
if settings.RELATIVE_BASE:
self.now = settings.RELATIVE_BASE

elif 'local' in settings.TIMEZONE.lower():
self.now = datetime.now()

else:
utc_dt = datetime.utcnow()
self.now = apply_timezone(utc_dt, settings.TIMEZONE)

date, period = self._parse(date_string)

if date:
Expand All @@ -62,8 +61,8 @@ def parse(self, date_string, settings):
else:
# No timezone shift takes place if time is given in the string.
# e.g. `2 days ago at 1 PM`
if settings.TIMEZONE:
date = apply_timezone(date, settings.TIMEZONE)
if settings.TO_TIMEZONE:
date = apply_timezone(date, settings.TO_TIMEZONE)

if not settings.RETURN_AS_TIMEZONE_AWARE:
date = date.replace(tzinfo=None)
Expand Down
21 changes: 20 additions & 1 deletion dateparser/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import unicodedata

import regex as re
from pytz import UTC, timezone
from pytz import UTC, timezone, UnknownTimeZoneError

from dateparser.timezone_parser import _tz_offsets, StaticTzInfo

Expand Down Expand Up @@ -88,6 +88,25 @@ def find_date_separator(format):
return m.group(1)


def localize_timezone(date_time, tz_string):
if date_time.tzinfo:
return date_time

tz = None

try:
tz = timezone(tz_string)
except UnknownTimeZoneError as e:
for name, info in _tz_offsets:
if info['regex'].search(' %s' % tz_string):
tz = StaticTzInfo(name, info['offset'])
break
else:
raise e

return tz.localize(date_time)


def apply_tzdatabase_timezone(date_time, pytz_string):
usr_timezone = timezone(pytz_string)

Expand Down
9 changes: 6 additions & 3 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,15 @@ Date Order
Timezone Related Configurations
+++++++++++++++++++++++++++++++

``TIMEZONE`` defaults to `UTC`. All dates, complete or relative, are assumed to be in `UTC`. When specified, resultant :class:`datetime <datetime.datetime>` converts according to the supplied timezone:
``TIMEZONE`` defaults to local timezone. When specified, resultant :class:`datetime <datetime.datetime>` is localized with the given timezone.

>>> parse('January 12, 2012 10:00 PM')
>>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'})
datetime.datetime(2012, 1, 12, 22, 0)

>>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'})
``TO_TIMEZONE`` defaults to None. When specified, resultant :class:`datetime <datetime.datetime>` converts according to the supplied timezone:

>>> settings = {'TIMEZONE': 'UTC', 'TO_TIMEZONE': 'US/Eastern'}
>>> parse('January 12, 2012 10:00 PM', settings=settings)
datetime.datetime(2012, 1, 12, 17, 0)

``RETURN_AS_TIMEZONE_AWARE`` is a flag to turn on timezone aware dates if timezone is detected or specified.:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ jdatetime
umalqurra
convertdate
pytz
tzlocal
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
'umalqurra',
'pytz',
'regex',
'tzlocal',
],
license="BSD",
zip_safe=False,
Expand Down
5 changes: 2 additions & 3 deletions tests/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,7 @@ def test_parse_date_using_format(self, date_string, date_formats, expected_resul
param(date_string="2014/11/17 14:56 EDT", expected_result=datetime(2014, 11, 17, 18, 56)),
])
def test_parse_date_with_timezones_not_using_formats(self, date_string, expected_result):
self.given_local_tz_offset(0)
self.given_parser()
self.given_parser(settings={'TO_TIMEZONE': 'UTC'})
self.when_date_string_is_parsed(date_string)
self.then_date_was_parsed()
self.then_period_is('day')
Expand Down Expand Up @@ -475,7 +474,7 @@ def then_date_language(self):
self.assertIsNotNone(self.result['language'])

def then_date_is_n_days_ago(self, days):
today = datetime.utcnow().date()
today = datetime.now().date()
expected_date = today - timedelta(days=days)
self.assertEqual(expected_date, self.result['date_obj'].date())

Expand Down
20 changes: 8 additions & 12 deletions tests/test_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,12 @@ def setUp(self):
param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
param('Tues 9th Aug, 2015', datetime(2015, 8, 9)),
param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
param('10:04am EDT', datetime(2012, 11, 13, 10, 4)),
param('Friday', datetime(2012, 11, 9)),
param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)),
param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)),
param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
Expand Down Expand Up @@ -342,7 +342,6 @@ def setUp(self):
param('2016年9月14日晚8:00', datetime(2016, 9, 14, 20, 0)),
])
def test_dates_parsing(self, date_string, expected):
self.given_local_tz_offset(0)
self.given_parser(settings={'NORMALIZE': False,
'RELATIVE_BASE': datetime(2012, 11, 13)})
self.when_date_is_parsed(date_string)
Expand All @@ -363,12 +362,12 @@ def test_stringified_datetime_should_parse_fine(self):
# English dates
param('[Sept] 04, 2014.', datetime(2014, 9, 4)),
param('Tuesday Jul 22, 2014', datetime(2014, 7, 22)),
param('10:04am EDT', datetime(2012, 11, 13, 14, 4)),
param('10:04am EDT', datetime(2012, 11, 13, 10, 4)),
param('Friday', datetime(2012, 11, 9)),
param('November 19, 2014 at noon', datetime(2014, 11, 19, 12, 0)),
param('December 13, 2014 at midnight', datetime(2014, 12, 13, 0, 0)),
param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 26, 3, 17)),
param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 16, 0)),
param('Nov 25 2014 10:17 pm EST', datetime(2014, 11, 25, 22, 17)),
param('Wed Aug 05 12:00:00 EDT 2015', datetime(2015, 8, 5, 12, 0)),
param('April 9, 2013 at 6:11 a.m.', datetime(2013, 4, 9, 6, 11)),
param('Aug. 9, 2012 at 2:57 p.m.', datetime(2012, 8, 9, 14, 57)),
param('December 10, 2014, 11:02:21 pm', datetime(2014, 12, 10, 23, 2, 21)),
Expand Down Expand Up @@ -497,12 +496,10 @@ def test_dates_parsing_with_normalization(self, date_string, expected):
param('Sep 03 2014 | 4:32 pm EDT', datetime(2014, 9, 3, 20, 32)),
param('17th October, 2034 @ 01:08 am PDT', datetime(2034, 10, 17, 8, 8)),
param('15 May 2004 23:24 EDT', datetime(2004, 5, 16, 3, 24)),
param('15 May 2004', datetime(2004, 5, 15, 0, 0)),
param('08/17/14 17:00 (PDT)', datetime(2014, 8, 18, 0, 0)),
])
def test_parsing_with_time_zones(self, date_string, expected):
self.given_local_tz_offset(+1)
self.given_parser()
self.given_parser(settings={'TO_TIMEZONE': 'UTC'})
self.when_date_is_parsed(date_string)
self.then_date_was_parsed_by_date_parser()
self.then_period_is('day')
Expand All @@ -516,8 +513,7 @@ def test_parsing_with_time_zones(self, date_string, expected):
param('Fri, 09 Sep 2005 13:51:39 +0000', datetime(2005, 9, 9, 13, 51, 39)),
])
def test_parsing_with_utc_offsets(self, date_string, expected):
self.given_local_tz_offset(0)
self.given_parser()
self.given_parser(settings={'TO_TIMEZONE': 'utc'})
self.when_date_is_parsed(date_string)
self.then_date_was_parsed_by_date_parser()
self.then_period_is('day')
Expand Down
2 changes: 2 additions & 0 deletions tests/test_freshness_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def setUp(self):
self.date = NotImplemented
self.time = NotImplemented

settings.TIMEZONE = 'utc'

@parameterized.expand([
# English dates
param('yesterday', ago={'days': 1}, period='day'),
Expand Down
5 changes: 2 additions & 3 deletions tests/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setUp(self):
])
def test_should_return_tz_aware_dates(self, ds, dt):
self.given(ds)
self.given_configurations({'RETURN_AS_TIMEZONE_AWARE': True})
self.given_configurations({'RETURN_AS_TIMEZONE_AWARE': True, 'TO_TIMEZONE': 'UTC'})
self.when_date_is_parsed()
self.then_date_is_tz_aware()
self.then_date_is(dt)
Expand All @@ -50,14 +50,13 @@ def test_should_return_and_assert_tz(self, ds, dt, tz):
self.then_timezone_is(tz)

@parameterized.expand([
param('12 Feb 2015 4:30 PM', datetime(2015, 2, 12, 16, 30), None),
param('12 Feb 2015 4:30 PM EST', datetime(2015, 2, 12, 16, 30), 'EST'),
param('12 Feb 2015 8:30 PM PKT', datetime(2015, 2, 12, 20, 30), 'PKT'),
param('12 Feb 2015 8:30 PM ACT', datetime(2015, 2, 12, 20, 30), 'ACT'),
])
def test_only_return_explicit_timezone(self, ds, dt, tz):
self.given(ds)
self.given_configurations({'RETURN_AS_TIMEZONE_AWARE': True, 'TIMEZONE': None})
self.given_configurations({'RETURN_AS_TIMEZONE_AWARE': True})
self.when_date_is_parsed()
self.then_date_is(dt)
if tz:
Expand Down
31 changes: 31 additions & 0 deletions tests/test_timezone_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import dateparser.timezone_parser
from dateparser.timezone_parser import pop_tz_offset_from_string, get_local_tz_offset
from dateparser import parse
from tests import BaseTestCase


Expand Down Expand Up @@ -126,3 +127,33 @@ def _given_time(self, datetime_string, getter_name):
self.add_patch(
patch('dateparser.timezone_parser.datetime', new=datetime_cls)
)


class TestTimeZoneConversion(BaseTestCase):
def setUp(self):
super(TestTimeZoneConversion, self).setUp()
self.settings = {}
self.parser = parse
self.result = NotImplemented

@parameterized.expand([
param('2015-12-31 10:04 AM', 'Asia/Karachi', 'UTC', datetime(2015, 12, 31, 5, 4)),
param('2015-12-30 10:04 AM', 'Asia/Karachi', '+0200', datetime(2015, 12, 30, 7, 4)),
])
def test_timezone_conversion(self, datestring, from_tz, to_tz, expected):
self.given_from_timezone(from_tz)
self.given_to_timezone(to_tz)
self.when_date_is_parsed(datestring)
self.then_date_is(expected)

def given_from_timezone(self, timezone):
self.settings['TIMEZONE'] = timezone

def given_to_timezone(self, timezone):
self.settings['TO_TIMEZONE'] = timezone

def when_date_is_parsed(self, datestring):
self.result = self.parser(datestring, settings=self.settings)

def then_date_is(self, date):
self.assertEqual(date, self.result)
23 changes: 22 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import itertools

from datetime import datetime
from tests import BaseTestCase
from nose_parameterized import parameterized, param
from dateparser.utils import find_date_separator
from dateparser.utils import (
find_date_separator, localize_timezone, apply_timezone
)


class TestUtils(BaseTestCase):
Expand Down Expand Up @@ -29,3 +33,20 @@ def test_separator_extraction(self, date_format, expected_sep):
self.given_date_format(date_format)
self.when_date_seperator_is_parsed()
self.then_date_seperator_is(expected_sep)

@parameterized.expand([
param(datetime(2015, 12, 12), timezone='UTC', zone='UTC'),
param(datetime(2015, 12, 12), timezone='Asia/Karachi', zone='Asia/Karachi'),
])
def test_localize_timezone(self, date, timezone, zone):
tzaware_dt = localize_timezone(date, timezone)
self.assertEqual(tzaware_dt.tzinfo.zone, zone)

@parameterized.expand([
param(datetime(2015, 12, 12, 10, 12), timezone='Asia/Karachi', expected=datetime(2015, 12, 12, 15, 12)),
param(datetime(2015, 12, 12, 10, 12), timezone='-0500', expected=datetime(2015, 12, 12, 5, 12)),
])
def test_apply_timezone(self, date, timezone, expected):
result = apply_timezone(date, timezone)
result = result.replace(tzinfo=None)
self.assertEqual(expected, result)

0 comments on commit a7e96d3

Please sign in to comment.