Skip to content

edsnlp.pipelines.misc.dates.patterns

raw_delimiters = ['\\/', '\\-'] module-attribute

delimiters = raw_delimiters + ['\\.', '[^\\S\\r\\n]+'] module-attribute

raw_delimiter_pattern = make_pattern(raw_delimiters) module-attribute

raw_delimiter_with_spaces_pattern = make_pattern(raw_delimiters + ['[^\\S\\r\\n]+']) module-attribute

delimiter_pattern = make_pattern(delimiters) module-attribute

ante_num_pattern = '(?<!{raw_delimiter_pattern})' module-attribute

post_num_pattern = '(?!{raw_delimiter_pattern})' module-attribute

full_year_pattern = ante_num_pattern + fy_pattern + post_num_pattern module-attribute

absolute_date_pattern: List[str] = [ante_num_pattern + day_pattern + d + month_pattern + d + year_pattern + post_num_pattern for d in delimiters] + [ante_num_pattern + year_pattern + d + numeric_month_pattern + d + numeric_day_pattern + post_num_pattern for d in delimiters] module-attribute

full_date_pattern = [ante_num_pattern + fy_pattern + d + lz_numeric_month_pattern + d + lz_numeric_day_pattern + post_num_pattern for d in ['-', '\\.']] module-attribute

no_year_pattern = [day + raw_delimiter_with_spaces_pattern + month for day in [ante_num_pattern + numeric_day_pattern, letter_day_pattern] for month in [numeric_month_pattern + post_num_pattern, letter_month_pattern]] module-attribute

no_day_pattern = [letter_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern, ante_num_pattern + lz_numeric_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern] module-attribute

relative_date_pattern = relative_pattern module-attribute

since_pattern = ['(?<=depuis)' + '.{,5}' + pattern for pattern in absolute_date_pattern + no_year_pattern + full_date_pattern + [relative_pattern]] module-attribute

false_positive_pattern = make_pattern(['(\\d+' + delimiter_pattern + '){3,}\\d+', '\\d\\/\\d']) module-attribute

current

current_patterns: List[str] = ['cette\\sann[ée]e(?![-\\s]l[àa])', 'ce\\sjour', 'ces\\sjours[-\\s]ci', "aujourd'?hui", 'ce\\smois([-\\s]ci)?', 'cette\\ssemaine', 'cet?\\s([ée]t[ée]|automne|hiver|printemps)'] module-attribute

current_pattern = make_pattern(current_patterns, with_breaks=True) module-attribute

relative

ago_pattern = 'il\\s+y\\s+a\\s+.{,10}?\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)' module-attribute

in_pattern = 'dans\\s+.{,10}?\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)' module-attribute

last_pattern = "l['ae]\\s*(semaine|année|an|mois)\\s+derni[èe]re?" module-attribute

next_pattern = "l['ae]\\s*(semaine|année|an|mois)\\s+prochaine?" module-attribute

since_pattern = '(?<=depuis\\s)\\s*.{,10}\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)(\\s+derni[èe]re?)?' module-attribute

during_pattern = '(pendant|pdt|pour)\\s+.{,10}?\\s+(heures?|jours?|mois|ann[ée]es?|ans?)' module-attribute

week_patterns = ['(avant\\-?\\s*)?hier', '(apr[èe]s\\-?\\s*)?demain'] module-attribute

week_pattern = make_pattern(week_patterns, with_breaks=True) module-attribute

relative_pattern = make_pattern(patterns=[ago_pattern, in_pattern, last_pattern, next_pattern, since_pattern, week_pattern], with_breaks=True) module-attribute

atomic

time

hour_pattern = '(?<!\\d)(?P<hour>0?[1-9]|1\\d|2[0-3])(?!\\d)' module-attribute
lz_hour_pattern = '(?<!\\d)(?P<hour>0[1-9]|[12]\\d|3[01])(?!\\d)' module-attribute
minute_pattern = '(?<!\\d)(?P<minute>0?[1-9]|[1-5]\\d)(?!\\d)' module-attribute
lz_minute_pattern = '(?<!\\d)(?P<minute>0[1-9]|[1-5]\\d)(?!\\d)' module-attribute
second_pattern = '(?<!\\d)(?P<second>0?[1-9]|[1-5]\\d)(?!\\d)' module-attribute
lz_second_pattern = '(?<!\\d)(?P<second>0[1-9]|[1-5]\\d)(?!\\d)' module-attribute
time_pattern = '(\\s.{,3}' + '{hour_pattern}[h:]({lz_minute_pattern})?' + '((:|m|min){lz_second_pattern})?' + ')?' module-attribute

years

year_patterns: List[str] = ['19\\d\\d'] + [str(year) for year in range(2000, date.today().year + 2)] module-attribute
full_year_pattern = '(?<!\\d)' + full_year_pattern + '(?!\\d)' module-attribute
year_pattern = '(?<!\\d)' + year_pattern + '(?!\\d)' module-attribute

months

letter_months_dict: Dict[str, int] = {'(janvier|janv\\.?)': 1, '(f[ée]vrier|f[ée]v\\.?)': 2, '(mars|mar\\.?)': 3, '(avril|avr\\.?)': 4, 'mai': 5, 'juin': 6, '(juillet|juill?\\.?)': 7, 'ao[uû]t': 8, '(septembre|sept?\\.?)': 9, '(octobre|oct\\.?)': 10, '(novembre|nov\\.)': 11, '(d[ée]cembre|d[ée]c\\.?)': 12} module-attribute
letter_months: List[str] = list(letter_months_dict.keys()) module-attribute
month_pattern = '(?P<month>{letter_month_pattern}|{numeric_month_pattern})' module-attribute
letter_month_pattern = '(?P<month>{letter_month_pattern})' module-attribute
numeric_month_pattern = '(?P<month>{numeric_month_pattern})' module-attribute
lz_numeric_month_pattern = '(?P<month>{lz_numeric_month_pattern})' module-attribute

days

letter_days_dict: Dict[str, int] = {'(premier|1\\s*er)': 1, 'deux': 2, 'trois': 3, 'quatre': 4, 'cinq': 5, 'six': 6, 'sept': 7, 'huit': 8, 'neuf': 9, 'dix': 10, 'onze': 11, 'douze': 12, 'treize': 13, 'quatorze': 14, 'quinze': 15, 'seize': 16, 'dix\\-?\\s*sept': 17, 'dix\\-?\\s*huit': 18, 'dix\\-?\\s*neuf': 19, 'vingt': 20, 'vingt\\-?\\s*et\\-?\\s*un': 21, 'vingt\\-?\\s*deux': 22, 'vingt\\-?\\s*trois': 23, 'vingt\\-?\\s*quatre': 24, 'vingt\\-?\\s*cinq': 25, 'vingt\\-?\\s*six': 26, 'vingt\\-?\\s*sept': 27, 'vingt\\-?\\s*huit': 28, 'vingt\\-?\\s*neuf': 29, 'trente': 30, 'trente\\-?\\s*et\\-?\\s*un': 31} module-attribute
letter_days: List[str] = list(letter_days_dict.keys()) module-attribute
nlz_numeric_day_pattern = '(?<!\\d)([1-9]|[12]\\d|3[01])(?!\\d)' module-attribute
day_pattern = '(?P<day>{letter_day_pattern}|{numeric_day_pattern})' module-attribute
letter_day_pattern = '(?P<day>{letter_day_pattern})' module-attribute
numeric_day_pattern = '(?P<day>{numeric_day_pattern})' module-attribute
lz_numeric_day_pattern = '(?P<day>{lz_numeric_day_pattern})' module-attribute
Back to top