From 2876f94bc6f861344077dbce0bd90d440964ace8 Mon Sep 17 00:00:00 2001 From: orsinium Date: Sun, 2 Apr 2017 20:56:14 +0500 Subject: [PATCH] experimental timezone supporting --- manual_test.py | 8 +++++--- rutimeparser/__init__.py | 29 +++++++++++++++++++---------- rutimeparser/get_cat.py | 29 ++++++++++++++++------------- rutimeparser/reducers.py | 24 ++++++++++++------------ rutimeparser/utils.py | 29 +++++++++++++++++++++++++++++ tests.py | 2 +- 6 files changed, 82 insertions(+), 39 deletions(-) diff --git a/manual_test.py b/manual_test.py index 9a25b34..d587840 100644 --- a/manual_test.py +++ b/manual_test.py @@ -1,4 +1,4 @@ -from timeparser import parse_time, get_clear_text, get_last_clear_text +from rutimeparser import parse_time, get_clear_text, get_last_clear_text if __name__ == '__main__': for test in open('test_strings', 'r'): @@ -6,5 +6,7 @@ while 1: text = input('> ') - print('\x1B[32m ', get_clear_text(text), '\x1B[0m') - print('\x1B[33m ', get_last_clear_text(text), '\x1B[0m') + print('Time: \x1B[32m', parse_time(text), '\x1B[0m') + print('Moscow:\x1B[32m', parse_time(text, tz='Europe/Moscow'), '\x1B[0m') + print('Text: \x1B[32m', get_clear_text(text), '\x1B[0m') + print('Last: \x1B[33m ', get_last_clear_text(text), '\x1B[0m') diff --git a/rutimeparser/__init__.py b/rutimeparser/__init__.py index df1d0a2..8f078c0 100644 --- a/rutimeparser/__init__.py +++ b/rutimeparser/__init__.py @@ -2,7 +2,7 @@ from .get_words import get_words from .get_cat import get_cat -from .utils import ngrams, Node +from .utils import ngrams, Node, get_now, change_timezone from .reducers import templates @@ -12,22 +12,27 @@ class TimeParser: Возвращает datetime, date или None. ''' - def __init__(self, text='', words=None): + def __init__(self, text='', words=None, tz=None, now=None): if not words: if not text: raise ValueError('Please, set text or words for TimeParser.') self.words = tuple(get_words(text)) else: self.words = words + + self.tz = tz + self.now = now if now else get_now(self.tz) + + if not self.tz and now and now.tzinfo: + self.tz = str(dt.tzinfo) def make_nodes(self): ''' Генерирует список нод на основе слов исходного текста ''' - self.nodes = [] for i, word in enumerate(self.words): - cat, value = get_cat(word) + cat, value = get_cat(word, self.now) self.nodes.append(Node(i, cat, word, value)) return self.nodes @@ -95,7 +100,7 @@ def reduce(self): for f, *template in templates: nodes_samples = list(self.get_nodes_by_template(*template)) for nodes in nodes_samples: - new_node = f(nodes) + new_node = f(nodes, now=self.now) self.replace(nodes[0], nodes[-1], new_node) def __dict__(self): @@ -110,10 +115,11 @@ def get_datetime(self): ''' nodes = self.__dict__() if 'datetime' in nodes: - return nodes['datetime'] + return change_timezone(nodes['datetime'], self.tz) now = datetime.now() if 'date' in nodes and 'time' in nodes: - return datetime.combine(nodes['date'], nodes['time']) + dt = datetime.combine(nodes['date'], nodes['time']) + return change_timezone(dt, self.tz) if 'time' in nodes: return datetime.combine(now.date(), nodes['time']) if 'date' in nodes: @@ -128,16 +134,19 @@ def get_clear_text(self): def get_last_clear_text(self): result = [] - chain = list(self.get_junk_chains())[-1] + chains = list(self.get_junk_chains()) + if not chains: + return '' + chain = chains[-1] return ' '.join([node.word for node in chain]) -def parse_time(text, remove_junk=True, debug=False): +def parse_time(text, *, tz=None, now=None, remove_junk=True, debug=False): ''' Для тех, кто не любит классы. Выполняет все необходимые операции с текстом и возвращает результат. ''' - tp = TimeParser(text) + tp = TimeParser(text, tz=tz, now=now) tp.make_nodes() if debug: from pprint import pprint diff --git a/rutimeparser/get_cat.py b/rutimeparser/get_cat.py index 457404e..f9a4f24 100644 --- a/rutimeparser/get_cat.py +++ b/rutimeparser/get_cat.py @@ -5,10 +5,11 @@ from datetime import datetime, timedelta +from .utils import change_timezone from . import rules -def date(word): +def date(word, now): for rd in rules.dates: try: result = datetime.strptime(word, rd).date() @@ -19,9 +20,9 @@ def date(word): if word in rules.from_now: delta = rules.from_now[word] - return datetime.now().date() + timedelta(delta) + return now.date() + timedelta(delta) -def time(word): +def time(word, **kwargs): for rd in rules.times: try: result = datetime.strptime(word, rd).time() @@ -32,43 +33,45 @@ def time(word): if word in rules.times_of_day: hour = rules.times_of_day[word] - return datetime(2000, 1, 1, hour).time() + dt = datetime(2000, 1, 1, hour).time() + return dt -def my_datetime(word): +def my_datetime(word, now): if word == 'сейчас': - return datetime.now() + return now my_datetime.__name__ = 'datetime' -def number(word): +def number(word, **kwargs): if word.isdigit(): return int(word) -def delta_offset(word): +def delta_offset(word, **kwargs): for offset, *words in rules.offset: if word in words: return offset -def delta_size(word): +def delta_size(word, **kwargs): for result, words in rules.delta_sizes: if word in words: return result -def weekday(word): +def weekday(word, **kwargs): for ws in rules.weekdays: if word in ws: return ws.index(word) -def month(word): +def month(word, **kwargs): for ms in rules.months: if word in ms: return ms.index(word) + 1 + fs = (date, time, number, delta_offset, delta_size, weekday, month, my_datetime) -def get_cat(word): +def get_cat(word, now): 'Принимает на вход слово и возвращает ноду' for f in fs: - result = f(word) + result = f(word, now=now) if result is not None: return f.__name__, result return 'junk', word diff --git a/rutimeparser/reducers.py b/rutimeparser/reducers.py index e93a5ef..b8dce76 100644 --- a/rutimeparser/reducers.py +++ b/rutimeparser/reducers.py @@ -1,18 +1,18 @@ from datetime import datetime, timedelta from .utils import my_timedelta, Node, extract_word, extract_values -def number_and_month(nodes): +def number_and_month(nodes, now): '[22] [февраля]' number, month = extract_values(nodes, 'number', 'month') if not number: number = 1 - year = datetime.now().year + year = now.year value = datetime(year, month, number).date() - if value < datetime.now().date(): + if value < now.date(): value = datetime(year + 1, month, number).date() return Node(nodes[0].i, 'date', extract_word(nodes), value) -def make_delta(nodes): +def make_delta(nodes, **kwargs): '[1] [час]' number, size = extract_values(nodes, 'number', 'delta_size') if not number: @@ -20,7 +20,7 @@ def make_delta(nodes): value = my_timedelta(**{size: number}) return Node(nodes[0].i, 'delta', extract_word(nodes), value) -def offset(nodes): +def offset(nodes, **kwargs): '[неделю] [назад]' delta, offset = extract_values(nodes, 'delta', 'delta_offset') if offset < 0: @@ -31,14 +31,14 @@ def offset(nodes): return Node(nodes[0].i, 'delta', extract_word(nodes), delta) -def sum_delta(nodes): +def sum_delta(nodes, **kwargs): '[2 часа] [17 минут]' value = my_timedelta() for node in nodes: value += node.value return Node(nodes[0].i, 'delta', extract_word(nodes), value) -def date_and_time(nodes): +def date_and_time(nodes, now): '[22.02.2017] [17:45]' date, time = extract_values(nodes, 'date', 'time') if not date: @@ -46,19 +46,19 @@ def date_and_time(nodes): if date: date = date.date() else: - date = datetime.now().date() + date = now.date() value = datetime.combine(date, time) return Node(nodes[0].i, 'datetime', extract_word(nodes), value) -def dt_and_delta(nodes): +def dt_and_delta(nodes, now): '[завтра утром] [через час]' dt, delta = extract_values(nodes, 'datetime', 'delta') if not dt: - dt = datetime.now() + dt = now value = delta + dt return Node(nodes[0].i, 'datetime', extract_word(nodes), value) -def date_and_delta(nodes): +def date_and_delta(nodes, **kwargs): '[завтра утром] [через час]' date, delta = extract_values(nodes, 'date', 'delta') twilight = datetime(2000, 1, 1, 0, 0).time() @@ -70,7 +70,7 @@ def date_and_delta(nodes): cat = 'datetime' return Node(nodes[0].i, cat, extract_word(nodes), value) -def weekday(nodes): +def weekday(nodes, **kwargs): '[в апреле] [в следующий] [вторник]' offset, wday, dt = extract_values(nodes, 'delta_offset', 'weekday', 'datetime') if not dt: diff --git a/rutimeparser/utils.py b/rutimeparser/utils.py index 9a5124a..f48d3c1 100644 --- a/rutimeparser/utils.py +++ b/rutimeparser/utils.py @@ -1,9 +1,37 @@ import calendar import datetime +from pytz import timezone from collections import namedtuple Node = namedtuple('Node', ['i', 'cat', 'word', 'value']) +def get_now(tz): + if tz: + now = datetime.datetime.utcnow() + else: + now = datetime.datetime.now() + + #убрать микросекунды: + t = now.time() + t = datetime.time(t.hour, t.minute, t.second) + now = datetime.datetime.combine(now.date(), t) + + if not tz: + return now + return timezone(tz).fromutc(now) + + +def change_timezone(dt, tz): + 'Изменить часовой пояс для datetime/time' + if not tz: + return dt + if dt.tzname(): + #изменить часовой пояс + return dt.astimezone(timezone(tz)) + else: + #установить часовой пояс + return timezone(tz).localize(dt, is_dst=None) + def ngrams(l, n): 'Возвращает n-грамы из списка' @@ -14,6 +42,7 @@ def extract_word(nodes): 'Извлекает и объединяет все слова из переданных нод' return ' '.join([node.word for node in nodes]) + def extract_values(nodes, *template): ''' Возвращает список значений в соответствии со списком типов нод. diff --git a/tests.py b/tests.py index bf2fdcc..2df5552 100644 --- a/tests.py +++ b/tests.py @@ -1,5 +1,5 @@ import unittest -from timeparser import parse_time +from rutimeparser import parse_time from datetime import datetime, timedelta, time templates = (