diff --git a/HISTORY.rst b/HISTORY.rst index 1bf36d1fb..8cb678ffa 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -16,7 +16,7 @@ Improvements: * Fixed problem with caching :func:`datetime.now` in :class:`FreshnessDateDataParser`. * Added month names and week day names abbreviations to several languages. * More simplifications for Russian and Ukranian languages. -* Fixed problem with parsing time component of date strings having punctuation symbols look alike. +* Fixed problem with parsing time component of date strings with several kinds of apostrophes. 0.3.1 (2015-10-28) diff --git a/README.rst b/README.rst index eeacf5edd..4f1e0ed76 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,7 @@ Features * Generic parsing of dates in English, Spanish, Dutch, Russian and several other languages and formats. * Generic parsing of relative dates like: ``'1 min ago'``, ``'2 weeks ago'``, ``'3 months, 1 week and 1 day ago'``. * Generic parsing of dates with time zones abbreviations or UTC offsets like: ``'August 14, 2015 EST'``, ``'July 4, 2013 PST'``, ``'21 July 2013 10:15 pm +0500'``. -* Support for non-Gregorian calendar systems with the first addition of :class:`JalaliParser `. See `Persian Jalali Calendar `_ for more information. +* Support for non-Gregorian calendar systems. See `Supported Calendars`_. * Extensive test coverage. @@ -99,13 +99,17 @@ Relative Dates Dependencies ============ -`dateparser` translates non-English dates to English and uses dateutil_ module ``parser`` to parse the translated date. +`dateparser` relies on following libraries in some ways: -Also, it requires PyYAML_ for its language detection module to work. The module jdatetime_ is used for handling Jalali calendar. + * dateutil_'s module ``parser`` to parse the translated dates. + * PyYAML_ for reading language and configuration files. + * jdatetime_ to convert *Jalali* dates to *Gregorian*. + * umalqurra_ to convert *Hijri* dates to *Gregorian*. .. _dateutil: https://pypi.python.org/pypi/python-dateutil .. _PyYAML: https://pypi.python.org/pypi/PyYAML .. _jdatetime: https://pypi.python.org/pypi/jdatetime +.. _umalqurra: https://pypi.python.org/pypi/umalqurra/ Supported languages @@ -133,15 +137,21 @@ Supported languages * Ukrainian * Vietnamese + Supported Calendars =================== -* Gregorian calendar +* Gregorian calendar. -* Persian Jalali calendar +* Persian Jalali calendar. For more information, refer to `Persian Jalali Calendar `_. -Example of Use for Jalali Calendar -================================== +* Hijri/Islamic Calendar. For more information, refer to `Hijri Calendar `_. >>> from dateparser.calendars.jalali import JalaliParser >>> JalaliParser(u'جمعه سی ام اسفند ۱۳۸۷').get_date() datetime.datetime(2009, 3, 20, 0, 0) + + >>> from dateparser.calendars.hijri import HijriCalendar + >>> HijriCalendar(u'17-01-1437 هـ 08:30 مساءً').get_date() + {'date_obj': datetime.datetime(2015, 10, 30, 20, 30), 'period': 'day'} + +.. note:: `HijriCalendar` has some limitations with Python 3. diff --git a/dateparser/__init__.py b/dateparser/__init__.py index 106bc9058..4a4a45538 100644 --- a/dateparser/__init__.py +++ b/dateparser/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__version__ = '0.3.1' +__version__ = '0.3.2' from .date import DateDataParser from .conf import apply_settings @@ -28,7 +28,8 @@ def parse(date_string, date_formats=None, languages=None, settings=None): :type settings: dict - :return: Returns a :mod:`datetime.datetime` if successful, else returns None + :return: Returns :class:`datetime ` representing parsed date if successful, else returns None + :rtype: :class:`datetime `. :raises: ValueError - Unknown Language """ parser = _default_parser diff --git a/dateparser/conf.py b/dateparser/conf.py index 007ba3ffb..ed0838d97 100644 --- a/dateparser/conf.py +++ b/dateparser/conf.py @@ -13,10 +13,12 @@ class Settings(object): """Control and configure default parsing behavior of dateparser. Currently, supported settings are: - - `PREFER_DATES_FROM`: defaults to `current_period`. Options are `future` or `past`. - - `SUPPORT_BEFORE_COMMON_ERA`: defaults to `False`. - - `PREFER_DAY_OF_MONTH`: defaults to `current`. Could be `first` and `last` day of month. - - `SKIP_TOKENS`: defaults to `['t']`. Can be any string. + + * `PREFER_DATES_FROM`: defaults to `current_period`. Options are `future` or `past`. + * `SUPPORT_BEFORE_COMMON_ERA`: defaults to `False`. + * `PREFER_DAY_OF_MONTH`: defaults to `current`. Could be `first` and `last` day of month. + * `SKIP_TOKENS`: defaults to `['t']`. Can be any string. + * `TIMEZONE`: defaults to `UTC`. Can be timezone abbreviation or any of `tz database name as given here `_. """ _default = True diff --git a/dateparser/date.py b/dateparser/date.py index 5ee6302cc..142f747ef 100644 --- a/dateparser/date.py +++ b/dateparser/date.py @@ -316,7 +316,7 @@ def get_date_data(self, date_string, date_formats=None): In the example below, since no day information is present, the day is assumed to be current day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this). - Hence, the level of precision is ``month``. + Hence, the level of precision is ``month``: >>> DateDataParser().get_date_data(u'March 2015') {'date_obj': datetime.datetime(2015, 3, 16, 0, 0), 'period': u'month'} @@ -328,6 +328,7 @@ def get_date_data(self, date_string, date_formats=None): {'date_obj': datetime.datetime(2014, 6, 16, 0, 0), 'period': u'year'} Dates with time zone indications or UTC offsets are returned in UTC time. + >>> DateDataParser().get_date_data(u'23 March 2000, 1:21 PM CET') {'date_obj': datetime.datetime(2000, 3, 23, 14, 21), 'period': 'day'} diff --git a/docs/usage.rst b/docs/usage.rst index e479e74dd..c9bea937d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -5,12 +5,12 @@ Using DateDataParser every time it is called and is not the most efficient way while parsing dates from the same source. -:class:`dateparser.date.DateDataParser` provides an alternate and efficient way +:class:`DateDataParser ` provides an alternate and efficient way to control language detection behavior. -The instance of :class:`dateparser.date.DateDataParser` reduces the number +The instance of :class:`DateDataParser ` reduces the number of applicable languages, until only one or no language is left. It -assumes the previously detected language for all the next dates and does not try +assumes the previously detected language for all the subsequent dates supplied and does not try to execute the language detection again after a language is discarded. This class wraps around the core :mod:`dateparser` functionality, and by default @@ -19,22 +19,13 @@ assumes that all of the dates fed to it are in the same language. .. autoclass:: dateparser.date.DateDataParser :members: get_date_data -Once initialized, :func:`dateparser.date.DateDataParser.get_date_data` parses date strings:: - - >>> from dateparser.date import DateDataParser - >>> ddp = DateDataParser() - >>> ddp.get_date_data(u'Martes 21 de Octubre de 2014') # Spanish - {'date_obj': datetime.datetime(2014, 10, 21, 0, 0), 'period': u'day'} - >>> ddp.get_date_data(u'13 Septiembre, 2014') # Spanish - {'date_obj': datetime.datetime(2014, 9, 13, 0, 0), 'period': u'day'} - .. warning:: It fails to parse *English* dates in the example below, because *Spanish* was detected and stored with the ``ddp`` instance: >>> ddp.get_date_data('11 August 2012') {'date_obj': None, 'period': 'day'} -:class:`dateparser.date.DateDataParser` can also be initialized with known languages:: +:class:`dateparser.date.DateDataParser` can also be initialized with known languages: >>> ddp = DateDataParser(languages=['de', 'nl']) >>> ddp.get_date_data(u'vr jan 24, 2014 12:49') @@ -42,9 +33,21 @@ Once initialized, :func:`dateparser.date.DateDataParser.get_date_data` parses da >>> ddp.get_date_data(u'18.10.14 um 22:56 Uhr') {'date_obj': datetime.datetime(2014, 10, 18, 22, 56), 'period': u'day'} -:mod:`dateparser`'s parsing behavior can be configured like below:: -*``PREFER_DAY_OF_MONTH``* defaults to ``current`` and can have ``first`` and ``last`` as values:: +Settings +======== + +:mod:`dateparser`'s parsing behavior can be configured like below: + +``TIMEZONE`` defaults to `UTC`. All dates, complete or relative, are assumed to be in `UTC`. When specified, resultant :class:`datetime ` converts according to the supplied timezone: + + >>> parse('January 12, 2012 10:00 PM') + datetime.datetime(2012, 1, 12, 22, 0) + + >>> parse('January 12, 2012 10:00 PM', settings={'TIMEZONE': 'US/Eastern'}) + datetime.datetime(2012, 1, 12, 17, 0) + +``PREFER_DAY_OF_MONTH`` defaults to ``current`` and can have ``first`` and ``last`` as values: >>> from dateparser import parse >>> parse(u'December 2015') # default behavior @@ -54,8 +57,8 @@ Once initialized, :func:`dateparser.date.DateDataParser.get_date_data` parses da >>> parse(u'December 2015', settings={'PREFER_DAY_OF_MONTH': 'first'}) datetime.datetime(2015, 12, 1, 0, 0) -*``PREFER_DATES_FROM``* defaults to ``current_period`` and can have ``past`` and ``future`` as values. -Assuming current date is June 16, 2015:: +``PREFER_DATES_FROM`` defaults to ``current_period`` and can have ``past`` and ``future`` as values. +Assuming current date is June 16, 2015: >>> from dateparser import parse >>> parse(u'March') @@ -63,7 +66,7 @@ Assuming current date is June 16, 2015:: >>> parse(u'March', settings={'PREFER_DATES_FROM': 'future'}) datetime.datetime(2016, 3, 16, 0, 0) -*``SKIP_TOKENS``* is a ``list`` of tokens to discard while detecting language. Defaults to ``['t']`` which skips T in iso format datetime string.e.g. ``2015-05-02T10:20:19+0000``.:: +``SKIP_TOKENS`` is a ``list`` of tokens to discard while detecting language. Defaults to ``['t']`` which skips T in iso format datetime string .e.g. ``2015-05-02T10:20:19+0000``.: >>> from dateparser.date import DateDataParser >>> DateDataParser(settings={'SKIP_TOKENS': ['de']}).get_date_data(u'27 Haziran 1981 de') # Turkish (at 27 June 1981)