Skip to content

Commit

Permalink
[dateparser] Fix parsing very short weekday names
Browse files Browse the repository at this point in the history
- Develop method to remove_multiple_occurrences of the day(s) of the week
- #1170
  • Loading branch information
adnan-awan committed Jan 30, 2024
1 parent 1bea64d commit 45bd628
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,31 @@ def clean_dictionary(dictionary, threshold=2):
del dictionary[del_key]
return dictionary

@property
def weekdays(self):
weekdays = [
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
]
return weekdays

def remove_multiple_occurrences(self, date_str_tokens: list):
# first occurrence of day of the week will be considered
# followings occurrence(s) will be skipped and removed from the token list.
weekdays_counter = 0
for i, token in enumerate(date_str_tokens):
if token in self.weekdays:
weekdays_counter += 1

if weekdays_counter > 1:
date_str_tokens.pop(i)
continue

def translate(self, date_string, keep_formatting=False, settings=None):
"""
Translate the date string to its English equivalent.
Expand Down Expand Up @@ -145,6 +170,7 @@ def translate(self, date_string, keep_formatting=False, settings=None):
if "in" in date_string_tokens:
date_string_tokens = self._clear_future_words(date_string_tokens)

self.remove_multiple_occurrences(date_string_tokens)
return self._join(
list(filter(bool, date_string_tokens)),
separator="" if keep_formatting else " ",
Expand Down

0 comments on commit 45bd628

Please sign in to comment.