edsnlp.pipelines.misc.dates.patterns
raw_delimiters = ['\\/', '\\-']
module-attribute
delimiters = raw_delimiters + ['\\.', '[^\\S\\r\\n]+']
module-attribute
raw_delimiter_pattern = make_pattern(raw_delimiters)
module-attribute
raw_delimiter_with_spaces_pattern = make_pattern(raw_delimiters + ['[^\\S\\r\\n]+'])
module-attribute
delimiter_pattern = make_pattern(delimiters)
module-attribute
ante_num_pattern = '(?<!{raw_delimiter_pattern})'
module-attribute
post_num_pattern = '(?!{raw_delimiter_pattern})'
module-attribute
full_year_pattern = ante_num_pattern + fy_pattern + post_num_pattern
module-attribute
absolute_date_pattern: List[str] = [ante_num_pattern + day_pattern + d + month_pattern + d + year_pattern + post_num_pattern for d in delimiters] + [ante_num_pattern + year_pattern + d + numeric_month_pattern + d + numeric_day_pattern + post_num_pattern for d in delimiters]
module-attribute
full_date_pattern = [ante_num_pattern + fy_pattern + d + lz_numeric_month_pattern + d + lz_numeric_day_pattern + post_num_pattern for d in ['-', '\\.']]
module-attribute
no_year_pattern = [day + raw_delimiter_with_spaces_pattern + month for day in [ante_num_pattern + numeric_day_pattern, letter_day_pattern] for month in [numeric_month_pattern + post_num_pattern, letter_month_pattern]]
module-attribute
no_day_pattern = [letter_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern, ante_num_pattern + lz_numeric_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern]
module-attribute
relative_date_pattern = relative_pattern
module-attribute
since_pattern = ['(?<=depuis)' + '.{,5}' + pattern for pattern in absolute_date_pattern + no_year_pattern + full_date_pattern + [relative_pattern]]
module-attribute
false_positive_pattern = make_pattern(['(\\d+' + delimiter_pattern + '){3,}\\d+', '\\d\\/\\d'])
module-attribute
current
current_patterns: List[str] = ['cette\\sann[ée]e(?![-\\s]l[àa])', 'ce\\sjour', 'ces\\sjours[-\\s]ci', "aujourd'?hui", 'ce\\smois([-\\s]ci)?', 'cette\\ssemaine', 'cet?\\s([ée]t[ée]|automne|hiver|printemps)']
module-attribute
current_pattern = make_pattern(current_patterns, with_breaks=True)
module-attribute
relative
ago_pattern = 'il\\s+y\\s+a\\s+.{,10}?\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)'
module-attribute
in_pattern = 'dans\\s+.{,10}?\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)'
module-attribute
last_pattern = "l['ae]\\s*(semaine|année|an|mois)\\s+derni[èe]re?"
module-attribute
next_pattern = "l['ae]\\s*(semaine|année|an|mois)\\s+prochaine?"
module-attribute
since_pattern = '(?<=depuis\\s)\\s*.{,10}\\s+(heures?|jours?|semaines?|mois|ann[ée]es?|ans?)(\\s+derni[èe]re?)?'
module-attribute
during_pattern = '(pendant|pdt|pour)\\s+.{,10}?\\s+(heures?|jours?|mois|ann[ée]es?|ans?)'
module-attribute
week_patterns = ['(avant\\-?\\s*)?hier', '(apr[èe]s\\-?\\s*)?demain']
module-attribute
week_pattern = make_pattern(week_patterns, with_breaks=True)
module-attribute
relative_pattern = make_pattern(patterns=[ago_pattern, in_pattern, last_pattern, next_pattern, since_pattern, week_pattern], with_breaks=True)
module-attribute
atomic
time
hour_pattern = '(?<!\\d)(?P<hour>0?[1-9]|1\\d|2[0-3])(?!\\d)'
module-attribute
lz_hour_pattern = '(?<!\\d)(?P<hour>0[1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
minute_pattern = '(?<!\\d)(?P<minute>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_minute_pattern = '(?<!\\d)(?P<minute>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
second_pattern = '(?<!\\d)(?P<second>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_second_pattern = '(?<!\\d)(?P<second>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
time_pattern = '(\\s.{,3}' + '{hour_pattern}[h:]({lz_minute_pattern})?' + '((:|m|min){lz_second_pattern})?' + ')?'
module-attribute
years
year_patterns: List[str] = ['19\\d\\d'] + [str(year) for year in range(2000, date.today().year + 2)]
module-attribute
full_year_pattern = '(?<!\\d)' + full_year_pattern + '(?!\\d)'
module-attribute
year_pattern = '(?<!\\d)' + year_pattern + '(?!\\d)'
module-attribute
months
letter_months_dict: Dict[str, int] = {'(janvier|janv\\.?)': 1, '(f[ée]vrier|f[ée]v\\.?)': 2, '(mars|mar\\.?)': 3, '(avril|avr\\.?)': 4, 'mai': 5, 'juin': 6, '(juillet|juill?\\.?)': 7, 'ao[uû]t': 8, '(septembre|sept?\\.?)': 9, '(octobre|oct\\.?)': 10, '(novembre|nov\\.)': 11, '(d[ée]cembre|d[ée]c\\.?)': 12}
module-attribute
letter_months: List[str] = list(letter_months_dict.keys())
module-attribute
month_pattern = '(?P<month>{letter_month_pattern}|{numeric_month_pattern})'
module-attribute
letter_month_pattern = '(?P<month>{letter_month_pattern})'
module-attribute
numeric_month_pattern = '(?P<month>{numeric_month_pattern})'
module-attribute
lz_numeric_month_pattern = '(?P<month>{lz_numeric_month_pattern})'
module-attribute
days
letter_days_dict: Dict[str, int] = {'(premier|1\\s*er)': 1, 'deux': 2, 'trois': 3, 'quatre': 4, 'cinq': 5, 'six': 6, 'sept': 7, 'huit': 8, 'neuf': 9, 'dix': 10, 'onze': 11, 'douze': 12, 'treize': 13, 'quatorze': 14, 'quinze': 15, 'seize': 16, 'dix\\-?\\s*sept': 17, 'dix\\-?\\s*huit': 18, 'dix\\-?\\s*neuf': 19, 'vingt': 20, 'vingt\\-?\\s*et\\-?\\s*un': 21, 'vingt\\-?\\s*deux': 22, 'vingt\\-?\\s*trois': 23, 'vingt\\-?\\s*quatre': 24, 'vingt\\-?\\s*cinq': 25, 'vingt\\-?\\s*six': 26, 'vingt\\-?\\s*sept': 27, 'vingt\\-?\\s*huit': 28, 'vingt\\-?\\s*neuf': 29, 'trente': 30, 'trente\\-?\\s*et\\-?\\s*un': 31}
module-attribute
letter_days: List[str] = list(letter_days_dict.keys())
module-attribute
nlz_numeric_day_pattern = '(?<!\\d)([1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
day_pattern = '(?P<day>{letter_day_pattern}|{numeric_day_pattern})'
module-attribute
letter_day_pattern = '(?P<day>{letter_day_pattern})'
module-attribute
numeric_day_pattern = '(?P<day>{numeric_day_pattern})'
module-attribute
lz_numeric_day_pattern = '(?P<day>{lz_numeric_day_pattern})'
module-attribute
Back to top