edsnlp.pipelines.misc.dates.patterns.atomic
time
hour_pattern = '(?<!\\d)(?P<hour>0?[1-9]|1\\d|2[0-3])(?!\\d)'
module-attribute
lz_hour_pattern = '(?<!\\d)(?P<hour>0[1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
minute_pattern = '(?<!\\d)(?P<minute>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_minute_pattern = '(?<!\\d)(?P<minute>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
second_pattern = '(?<!\\d)(?P<second>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_second_pattern = '(?<!\\d)(?P<second>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
time_pattern = '(\\s.{,3}' + '{hour_pattern}[h:]({lz_minute_pattern})?' + '((:|m|min){lz_second_pattern})?' + ')?'
module-attribute
years
year_patterns: List[str] = ['19\\d\\d'] + [str(year) for year in range(2000, date.today().year + 2)]
module-attribute
full_year_pattern = '(?<!\\d)' + full_year_pattern + '(?!\\d)'
module-attribute
year_pattern = '(?<!\\d)' + year_pattern + '(?!\\d)'
module-attribute
months
letter_months_dict: Dict[str, int] = {'(janvier|janv\\.?)': 1, '(f[ée]vrier|f[ée]v\\.?)': 2, '(mars|mar\\.?)': 3, '(avril|avr\\.?)': 4, 'mai': 5, 'juin': 6, '(juillet|juill?\\.?)': 7, 'ao[uû]t': 8, '(septembre|sept?\\.?)': 9, '(octobre|oct\\.?)': 10, '(novembre|nov\\.)': 11, '(d[ée]cembre|d[ée]c\\.?)': 12}
module-attribute
letter_months: List[str] = list(letter_months_dict.keys())
module-attribute
month_pattern = '(?P<month>{letter_month_pattern}|{numeric_month_pattern})'
module-attribute
letter_month_pattern = '(?P<month>{letter_month_pattern})'
module-attribute
numeric_month_pattern = '(?P<month>{numeric_month_pattern})'
module-attribute
lz_numeric_month_pattern = '(?P<month>{lz_numeric_month_pattern})'
module-attribute
days
letter_days_dict: Dict[str, int] = {'(premier|1\\s*er)': 1, 'deux': 2, 'trois': 3, 'quatre': 4, 'cinq': 5, 'six': 6, 'sept': 7, 'huit': 8, 'neuf': 9, 'dix': 10, 'onze': 11, 'douze': 12, 'treize': 13, 'quatorze': 14, 'quinze': 15, 'seize': 16, 'dix\\-?\\s*sept': 17, 'dix\\-?\\s*huit': 18, 'dix\\-?\\s*neuf': 19, 'vingt': 20, 'vingt\\-?\\s*et\\-?\\s*un': 21, 'vingt\\-?\\s*deux': 22, 'vingt\\-?\\s*trois': 23, 'vingt\\-?\\s*quatre': 24, 'vingt\\-?\\s*cinq': 25, 'vingt\\-?\\s*six': 26, 'vingt\\-?\\s*sept': 27, 'vingt\\-?\\s*huit': 28, 'vingt\\-?\\s*neuf': 29, 'trente': 30, 'trente\\-?\\s*et\\-?\\s*un': 31}
module-attribute
letter_days: List[str] = list(letter_days_dict.keys())
module-attribute
nlz_numeric_day_pattern = '(?<!\\d)([1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
day_pattern = '(?P<day>{letter_day_pattern}|{numeric_day_pattern})'
module-attribute
letter_day_pattern = '(?P<day>{letter_day_pattern})'
module-attribute
numeric_day_pattern = '(?P<day>{numeric_day_pattern})'
module-attribute
lz_numeric_day_pattern = '(?P<day>{lz_numeric_day_pattern})'
module-attribute
Back to top