Skip to content
This repository has been archived by the owner on Mar 30, 2020. It is now read-only.

Commit

Permalink
experimental timezone supporting
Browse files Browse the repository at this point in the history
  • Loading branch information
orsinium committed Apr 2, 2017
1 parent cc42cbe commit 2876f94
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 39 deletions.
8 changes: 5 additions & 3 deletions manual_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from timeparser import parse_time, get_clear_text, get_last_clear_text
from rutimeparser import parse_time, get_clear_text, get_last_clear_text

if __name__ == '__main__':
for test in open('test_strings', 'r'):
print(test.strip(), '\x1B[31m', parse_time(test), '\x1B[0m')

while 1:
text = input('> ')
print('\x1B[32m ', get_clear_text(text), '\x1B[0m')
print('\x1B[33m ', get_last_clear_text(text), '\x1B[0m')
print('Time: \x1B[32m', parse_time(text), '\x1B[0m')
print('Moscow:\x1B[32m', parse_time(text, tz='Europe/Moscow'), '\x1B[0m')
print('Text: \x1B[32m', get_clear_text(text), '\x1B[0m')
print('Last: \x1B[33m ', get_last_clear_text(text), '\x1B[0m')
29 changes: 19 additions & 10 deletions rutimeparser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .get_words import get_words
from .get_cat import get_cat
from .utils import ngrams, Node
from .utils import ngrams, Node, get_now, change_timezone
from .reducers import templates


Expand All @@ -12,22 +12,27 @@ class TimeParser:
Возвращает datetime, date или None.
'''

def __init__(self, text='', words=None):
def __init__(self, text='', words=None, tz=None, now=None):
if not words:
if not text:
raise ValueError('Please, set text or words for TimeParser.')
self.words = tuple(get_words(text))
else:
self.words = words

self.tz = tz
self.now = now if now else get_now(self.tz)

if not self.tz and now and now.tzinfo:
self.tz = str(dt.tzinfo)

def make_nodes(self):
'''
Генерирует список нод на основе слов исходного текста
'''

self.nodes = []
for i, word in enumerate(self.words):
cat, value = get_cat(word)
cat, value = get_cat(word, self.now)
self.nodes.append(Node(i, cat, word, value))
return self.nodes

Expand Down Expand Up @@ -95,7 +100,7 @@ def reduce(self):
for f, *template in templates:
nodes_samples = list(self.get_nodes_by_template(*template))
for nodes in nodes_samples:
new_node = f(nodes)
new_node = f(nodes, now=self.now)
self.replace(nodes[0], nodes[-1], new_node)

def __dict__(self):
Expand All @@ -110,10 +115,11 @@ def get_datetime(self):
'''
nodes = self.__dict__()
if 'datetime' in nodes:
return nodes['datetime']
return change_timezone(nodes['datetime'], self.tz)
now = datetime.now()
if 'date' in nodes and 'time' in nodes:
return datetime.combine(nodes['date'], nodes['time'])
dt = datetime.combine(nodes['date'], nodes['time'])
return change_timezone(dt, self.tz)
if 'time' in nodes:
return datetime.combine(now.date(), nodes['time'])
if 'date' in nodes:
Expand All @@ -128,16 +134,19 @@ def get_clear_text(self):

def get_last_clear_text(self):
result = []
chain = list(self.get_junk_chains())[-1]
chains = list(self.get_junk_chains())
if not chains:
return ''
chain = chains[-1]
return ' '.join([node.word for node in chain])


def parse_time(text, remove_junk=True, debug=False):
def parse_time(text, *, tz=None, now=None, remove_junk=True, debug=False):
'''
Для тех, кто не любит классы. Выполняет все необходимые операции
с текстом и возвращает результат.
'''
tp = TimeParser(text)
tp = TimeParser(text, tz=tz, now=now)
tp.make_nodes()
if debug:
from pprint import pprint
Expand Down
29 changes: 16 additions & 13 deletions rutimeparser/get_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from datetime import datetime, timedelta

from .utils import change_timezone
from . import rules


def date(word):
def date(word, now):
for rd in rules.dates:
try:
result = datetime.strptime(word, rd).date()
Expand All @@ -19,9 +20,9 @@ def date(word):

if word in rules.from_now:
delta = rules.from_now[word]
return datetime.now().date() + timedelta(delta)
return now.date() + timedelta(delta)

def time(word):
def time(word, **kwargs):
for rd in rules.times:
try:
result = datetime.strptime(word, rd).time()
Expand All @@ -32,43 +33,45 @@ def time(word):

if word in rules.times_of_day:
hour = rules.times_of_day[word]
return datetime(2000, 1, 1, hour).time()
dt = datetime(2000, 1, 1, hour).time()
return dt

def my_datetime(word):
def my_datetime(word, now):
if word == 'сейчас':
return datetime.now()
return now
my_datetime.__name__ = 'datetime'

def number(word):
def number(word, **kwargs):
if word.isdigit():
return int(word)

def delta_offset(word):
def delta_offset(word, **kwargs):
for offset, *words in rules.offset:
if word in words:
return offset

def delta_size(word):
def delta_size(word, **kwargs):
for result, words in rules.delta_sizes:
if word in words:
return result

def weekday(word):
def weekday(word, **kwargs):
for ws in rules.weekdays:
if word in ws:
return ws.index(word)

def month(word):
def month(word, **kwargs):
for ms in rules.months:
if word in ms:
return ms.index(word) + 1


fs = (date, time, number, delta_offset, delta_size, weekday, month, my_datetime)

def get_cat(word):
def get_cat(word, now):
'Принимает на вход слово и возвращает ноду'
for f in fs:
result = f(word)
result = f(word, now=now)
if result is not None:
return f.__name__, result
return 'junk', word
24 changes: 12 additions & 12 deletions rutimeparser/reducers.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
from datetime import datetime, timedelta
from .utils import my_timedelta, Node, extract_word, extract_values

def number_and_month(nodes):
def number_and_month(nodes, now):
'[22] [февраля]'
number, month = extract_values(nodes, 'number', 'month')
if not number:
number = 1
year = datetime.now().year
year = now.year
value = datetime(year, month, number).date()
if value < datetime.now().date():
if value < now.date():
value = datetime(year + 1, month, number).date()
return Node(nodes[0].i, 'date', extract_word(nodes), value)

def make_delta(nodes):
def make_delta(nodes, **kwargs):
'[1] [час]'
number, size = extract_values(nodes, 'number', 'delta_size')
if not number:
number = 1
value = my_timedelta(**{size: number})
return Node(nodes[0].i, 'delta', extract_word(nodes), value)

def offset(nodes):
def offset(nodes, **kwargs):
'[неделю] [назад]'
delta, offset = extract_values(nodes, 'delta', 'delta_offset')
if offset < 0:
Expand All @@ -31,34 +31,34 @@ def offset(nodes):
return Node(nodes[0].i, 'delta', extract_word(nodes), delta)


def sum_delta(nodes):
def sum_delta(nodes, **kwargs):
'[2 часа] [17 минут]'
value = my_timedelta()
for node in nodes:
value += node.value
return Node(nodes[0].i, 'delta', extract_word(nodes), value)

def date_and_time(nodes):
def date_and_time(nodes, now):
'[22.02.2017] [17:45]'
date, time = extract_values(nodes, 'date', 'time')
if not date:
date = extract_values(nodes, 'datetime')[0]
if date:
date = date.date()
else:
date = datetime.now().date()
date = now.date()
value = datetime.combine(date, time)
return Node(nodes[0].i, 'datetime', extract_word(nodes), value)

def dt_and_delta(nodes):
def dt_and_delta(nodes, now):
'[завтра утром] [через час]'
dt, delta = extract_values(nodes, 'datetime', 'delta')
if not dt:
dt = datetime.now()
dt = now
value = delta + dt
return Node(nodes[0].i, 'datetime', extract_word(nodes), value)

def date_and_delta(nodes):
def date_and_delta(nodes, **kwargs):
'[завтра утром] [через час]'
date, delta = extract_values(nodes, 'date', 'delta')
twilight = datetime(2000, 1, 1, 0, 0).time()
Expand All @@ -70,7 +70,7 @@ def date_and_delta(nodes):
cat = 'datetime'
return Node(nodes[0].i, cat, extract_word(nodes), value)

def weekday(nodes):
def weekday(nodes, **kwargs):
'[в апреле] [в следующий] [вторник]'
offset, wday, dt = extract_values(nodes, 'delta_offset', 'weekday', 'datetime')
if not dt:
Expand Down
29 changes: 29 additions & 0 deletions rutimeparser/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
import calendar
import datetime
from pytz import timezone

from collections import namedtuple
Node = namedtuple('Node', ['i', 'cat', 'word', 'value'])

def get_now(tz):
if tz:
now = datetime.datetime.utcnow()
else:
now = datetime.datetime.now()

#убрать микросекунды:
t = now.time()
t = datetime.time(t.hour, t.minute, t.second)
now = datetime.datetime.combine(now.date(), t)

if not tz:
return now
return timezone(tz).fromutc(now)


def change_timezone(dt, tz):
'Изменить часовой пояс для datetime/time'
if not tz:
return dt
if dt.tzname():
#изменить часовой пояс
return dt.astimezone(timezone(tz))
else:
#установить часовой пояс
return timezone(tz).localize(dt, is_dst=None)


def ngrams(l, n):
'Возвращает n-грамы из списка'
Expand All @@ -14,6 +42,7 @@ def extract_word(nodes):
'Извлекает и объединяет все слова из переданных нод'
return ' '.join([node.word for node in nodes])


def extract_values(nodes, *template):
'''
Возвращает список значений в соответствии со списком типов нод.
Expand Down
2 changes: 1 addition & 1 deletion tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from timeparser import parse_time
from rutimeparser import parse_time
from datetime import datetime, timedelta, time

templates = (
Expand Down

0 comments on commit 2876f94

Please sign in to comment.