Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 25 additions & 30 deletions dateparser/calendars/jalali_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from collections import OrderedDict
from functools import reduce

from convertdate import persian
Expand Down Expand Up @@ -40,35 +39,31 @@ class jalali_parser(non_gregorian_parser):
"۹": 9,
}

_months = OrderedDict(
[
# pinglish : (persian literals, month index, number of days)
("Farvardin", (1, 31, ["فروردین"])),
("Ordibehesht", (2, 31, ["اردیبهشت"])),
("Khordad", (3, 31, ["خرداد"])),
("Tir", (4, 31, ["تیر"])),
("Mordad", (5, 31, ["امرداد", "مرداد"])),
("Shahrivar", (6, 31, ["شهریور", "شهريور"])),
("Mehr", (7, 30, ["مهر"])),
("Aban", (8, 30, ["آبان"])),
("Azar", (9, 30, ["آذر"])),
("Dey", (10, 30, ["دی"])),
("Bahman", (11, 30, ["بهمن", "بهن"])),
("Esfand", (12, 29, ["اسفند"])),
]
)

_weekdays = OrderedDict(
[
("Sunday", ["یکشنبه"]),
("Monday", ["دوشنبه"]),
("Tuesday", ["سهشنبه", "سه شنبه"]),
("Wednesday", ["چهارشنبه", "چهار شنبه"]),
("Thursday", ["پنجشنبه", "پنج شنبه"]),
("Friday", ["جمعه"]),
("Saturday", ["روز شنبه", "شنبه"]),
]
)
_months = {
# pinglish : (persian literals, month index, number of days)
"Farvardin": (1, 31, ["فروردین"]),
"Ordibehesht": (2, 31, ["اردیبهشت"]),
"Khordad": (3, 31, ["خرداد"]),
"Tir": (4, 31, ["تیر"]),
"Mordad": (5, 31, ["امرداد", "مرداد"]),
"Shahrivar": (6, 31, ["شهریور", "شهريور"]),
"Mehr": (7, 30, ["مهر"]),
"Aban": (8, 30, ["آبان"]),
"Azar": (9, 30, ["آذر"]),
"Dey": (10, 30, ["دی"]),
"Bahman": (11, 30, ["بهمن", "بهن"]),
"Esfand": (12, 29, ["اسفند"]),
}

_weekdays = {
"Sunday": ["یکشنبه"],
"Monday": ["دوشنبه"],
"Tuesday": ["سهشنبه", "سه شنبه"],
"Wednesday": ["چهارشنبه", "چهار شنبه"],
"Thursday": ["پنجشنبه", "پنج شنبه"],
"Friday": ["جمعه"],
"Saturday": ["روز شنبه", "شنبه"],
}

_number_letters = {
0: ["صفر"],
Expand Down
3 changes: 3 additions & 0 deletions dateparser/data/date_translation_data/en.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@
"after"
],
"simplifications": [
{
"(\\d+[.,]?\\d*)\\s*mons?\\b": "\\1 month"
},
{
"an": "1"
},
Expand Down
4 changes: 2 additions & 2 deletions dateparser/languages/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _load_data(
use_given_order=False,
allow_conflicting_locales=False,
):
locale_dict = OrderedDict()
locale_dict = {}
if locales:
invalid_locales = []
for locale in locales:
Expand Down Expand Up @@ -198,7 +198,7 @@ def _load_data(
)

if not use_given_order:
locale_dict = OrderedDict(
locale_dict = dict(
sorted(locale_dict.items(), key=lambda x: language_order.index(x[1][0]))
)

Expand Down
3 changes: 1 addition & 2 deletions dateparser/languages/locale.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import OrderedDict
from itertools import chain

import regex as re
Expand Down Expand Up @@ -226,7 +225,7 @@ def _get_relative_translations(self, settings=None):

def _generate_relative_translations(self, normalize=False):
relative_translations = self.info.get("relative-type-regex", {})
relative_dictionary = OrderedDict()
relative_dictionary = {}
for key, value in relative_translations.items():
if normalize:
value = list(map(normalize_unicode, value))
Expand Down
19 changes: 8 additions & 11 deletions dateparser/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import calendar
from collections import OrderedDict
from datetime import datetime, timedelta, timezone
from io import StringIO

Expand Down Expand Up @@ -239,12 +238,10 @@ def _check_strict_parsing(missing, settings):


class _parser:
alpha_directives = OrderedDict(
[
("weekday", ["%A", "%a"]),
("month", ["%B", "%b"]),
]
)
alpha_directives = {
"weekday": ["%A", "%a"],
"month": ["%B", "%b"],
}

num_directives = {
"month": ["%m"],
Expand Down Expand Up @@ -273,10 +270,10 @@ def __init__(self, tokens, settings):
self._token_year = None
self._token_time = None

self.ordered_num_directives = OrderedDict(
(k, self.num_directives[k])
for k in (resolve_date_order(settings.DATE_ORDER, lst=True))
)
self.ordered_num_directives = {
k: self.num_directives[k]
for k in resolve_date_order(settings.DATE_ORDER, lst=True)
}

skip_index = []
skip_component = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ relative-type-regex:
- (\d+[.,]?\d*) decades? ago

simplifications:
- (\d+[.,]?\d*)\s*mons?\b: \1 month
- an: '1'
- a: '1'
- (?:12\s+)?noon: '12:00'
Expand Down
21 changes: 10 additions & 11 deletions dateparser_scripts/get_cldr_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os
import shutil
from collections import OrderedDict

import regex as re

Expand Down Expand Up @@ -49,17 +48,17 @@ def _retrieve_locale_data(locale):
ca_gregorian_file = cldr_dates_full_dir + locale + "/ca-gregorian.json"
dateFields_file = cldr_dates_full_dir + locale + "/dateFields.json"
with open(ca_gregorian_file) as f:
cldr_gregorian_data = json.load(f, object_pairs_hook=OrderedDict)
cldr_gregorian_data = json.load(f)

with open(dateFields_file) as g:
cldr_datefields_data = json.load(g, object_pairs_hook=OrderedDict)
cldr_datefields_data = json.load(g)

gregorian_dict = cldr_gregorian_data["main"][locale]["dates"]["calendars"][
"gregorian"
]
date_fields_dict = cldr_datefields_data["main"][locale]["dates"]["fields"]

json_dict = OrderedDict()
json_dict = {}

field_keys_1 = ["stand-alone", "format"]
field_keys_2 = [
Expand Down Expand Up @@ -292,7 +291,7 @@ def _retrieve_locale_data(locale):

json_dict["second"] = [date_fields_dict[key]["displayName"] for key in second_keys]

json_dict["relative-type"] = OrderedDict()
json_dict["relative-type"] = {}

json_dict["relative-type"]["1 year ago"] = [
date_fields_dict[key]["relative-type--1"] for key in year_keys
Expand Down Expand Up @@ -354,7 +353,7 @@ def _retrieve_locale_data(locale):
date_fields_dict[key]["relative-type-0"] for key in second_keys
]

json_dict["relative-type-regex"] = OrderedDict()
json_dict["relative-type-regex"] = {}

json_dict["relative-type-regex"]["in \\1 year"] = list(
filter(
Expand Down Expand Up @@ -524,11 +523,11 @@ def _clean_dict(json_dict):
"""Remove duplicates and sort"""
for key, value in json_dict.items():
if isinstance(value, list):
json_dict[key] = sorted(OrderedDict.fromkeys(map(_clean_string, value)))
json_dict[key] = sorted(dict.fromkeys(map(_clean_string, value)))
elif isinstance(value, dict):
json_dict[key] = OrderedDict(sorted(value.items()))
json_dict[key] = dict(sorted(value.items()))
json_dict[key] = _clean_dict(json_dict[key])
return OrderedDict(filter(lambda x: x[1], json_dict.items()))
return dict(filter(lambda x: x[1], json_dict.items()))


def main():
Expand All @@ -544,14 +543,14 @@ def main():

for language in language_locale_dict:
json_language_dict = _clean_dict(_retrieve_locale_data(language))
locale_specific_dict = OrderedDict()
locale_specific_dict = {}
locales_list = language_locale_dict[language]
for locale in locales_list:
json_locale_dict = _clean_dict(_retrieve_locale_data(locale))
locale_specific_dict[locale] = _clean_dict(
get_dict_difference(json_language_dict, json_locale_dict)
)
json_language_dict["locale_specific"] = OrderedDict(
json_language_dict["locale_specific"] = dict(
sorted(locale_specific_dict.items())
)
filename = directory + language + ".json"
Expand Down
3 changes: 1 addition & 2 deletions dateparser_scripts/order_languages.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import os
from collections import OrderedDict

import regex as re
import requests
Expand Down Expand Up @@ -186,7 +185,7 @@ def main():
language_order, separators=(",", ": "), indent=4
)

complete_language_locale_dict = OrderedDict()
complete_language_locale_dict = {}
for key in language_order:
if key in language_locale_dict.keys():
complete_language_locale_dict[key] = sorted(language_locale_dict[key])
Expand Down
5 changes: 2 additions & 3 deletions dateparser_scripts/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import shutil
from collections import OrderedDict

from git import Repo

Expand Down Expand Up @@ -36,7 +35,7 @@ def get_raw_data():


def get_dict_difference(parent_dict, child_dict):
difference_dict = OrderedDict()
difference_dict = {}
for key, child_value in child_dict.items():
parent_value = parent_dict.get(key)
child_specific_value = None
Expand All @@ -54,7 +53,7 @@ def get_dict_difference(parent_dict, child_dict):


def combine_dicts(primary_dict, supplementary_dict):
combined_dict = OrderedDict()
combined_dict = {}
for key, value in primary_dict.items():
if key in supplementary_dict:
if isinstance(value, list):
Expand Down
27 changes: 23 additions & 4 deletions dateparser_scripts/write_complete_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os
import shutil
from collections import OrderedDict

import regex as re
from ruamel.yaml import YAML
Expand All @@ -28,8 +27,27 @@
RELATIVE_PATTERN = re.compile(r"\{0\}")


def _to_plain_types(obj):
"""Recursively convert ruamel.yaml CommentedMap/CommentedSeq to plain
OrderedDict/list so that json.dumps produces stable output across all
Python versions.

Python 3.14 changed the json C encoder to bypass the Python-level
``__iter__``/``items()`` of dict subclasses and access the underlying C
dict directly. ruamel.yaml's CommentedMap relies on its Python-level
iteration for correct key ordering, so the C shortcut produces a
different (non-deterministic) key order on 3.14. Converting to plain
types before serialisation avoids the issue entirely.
"""
if isinstance(obj, dict):
return OrderedDict((k, _to_plain_types(v)) for k, v in obj.items())
elif isinstance(obj, list):
return [_to_plain_types(v) for v in obj]
return obj


def _modify_relative_data(relative_data):
modified_relative_data = OrderedDict()
modified_relative_data = {}
for key, value in relative_data.items():
for i, string in enumerate(value):
string = RELATIVE_PATTERN.sub(r"(\\d+[.,]?\\d*)", string)
Expand All @@ -52,11 +70,11 @@ def _get_complete_date_translation_data(language):
supplementary_data = {}
if language in cldr_languages:
with open(cldr_date_directory + language + ".json") as f:
cldr_data = json.load(f, object_pairs_hook=OrderedDict)
cldr_data = json.load(f)
if language in supplementary_languages:
with open(supplementary_date_directory + language + ".yaml") as g:
yaml = YAML()
supplementary_data = OrderedDict(yaml.load(g))
supplementary_data = dict(yaml.load(g))
complete_data = combine_dicts(cldr_data, supplementary_data)
if "name" not in complete_data:
complete_data["name"] = language
Expand Down Expand Up @@ -95,6 +113,7 @@ def write_complete_data(in_memory=False):
for language in all_languages:
date_translation_data = _get_complete_date_translation_data(language)
date_translation_data = combine_dicts(date_translation_data, base_data)
date_translation_data = _to_plain_types(date_translation_data)
_modify_data(date_translation_data)
translation_data = json.dumps(
date_translation_data, indent=4, separators=(",", ": "), ensure_ascii=False
Expand Down
22 changes: 9 additions & 13 deletions tests/test_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import datetime as real_datetime
import os
import unittest
from collections import OrderedDict
from datetime import datetime, timedelta
from datetime import timezone as dttz
from itertools import product
Expand Down Expand Up @@ -615,18 +614,15 @@ def test_temporal_nouns_are_parsed(self, date_string, days_ago):
self.then_date_is_n_days_ago(days=days_ago)

def test_should_not_assume_language_too_early(self):
dates_to_parse = OrderedDict(
[
("07/07/2014", datetime(2014, 7, 7).date()), # any language
("07.jul.2014 | 12:52", datetime(2014, 7, 7).date()), # en, es, pt, nl
("07.ago.2014 | 12:52", datetime(2014, 8, 7).date()), # es, it, pt
(
"07.feb.2014 | 12:52",
datetime(2014, 2, 7).date(),
), # en, de, es, it, nl, ro
("07.ene.2014 | 12:52", datetime(2014, 1, 7).date()),
]
) # es
dates_to_parse = {
"07/07/2014": datetime(2014, 7, 7).date(), # any language
"07.jul.2014 | 12:52": datetime(2014, 7, 7).date(), # en, es, pt, nl
"07.ago.2014 | 12:52": datetime(2014, 8, 7).date(), # es, it, pt
"07.feb.2014 | 12:52": datetime(
2014, 2, 7
).date(), # en, de, es, it, nl, ro
"07.ene.2014 | 12:52": datetime(2014, 1, 7).date(), # es
}

self.given_parser(
restrict_to_languages=["en", "de", "fr", "it", "pt", "nl", "ro", "es", "ru"]
Expand Down
10 changes: 7 additions & 3 deletions tests/test_freshness_date_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2374,15 +2374,19 @@ def test_dates_not_supported_by_date_time(self, date_string):

@parameterized.expand(
[
param("1mon ago"), # 1116
param("1mon ago", ago={"months": 1}, period="month"), # 1123
param("2mon ago", ago={"months": 2}, period="month"), # 1123
param("3mons ago", ago={"months": 3}, period="month"), # 1123
]
)
def test_known_issues(self, date_string):
def test_known_issues(self, date_string, ago, period):
self.given_parser()
self.given_date_string(date_string)
self.when_date_is_parsed()
self.then_error_was_not_raised()
self.assertEqual(None, self.result["date_obj"])
self.then_date_was_parsed_by_freshness_parser()
self.then_date_obj_is_exactly_this_time_ago(ago)
self.then_period_is(period)

@parameterized.expand(
[
Expand Down
Loading