edsnlp.pipelines.misc.dates.patterns.atomic
delimiters
raw_delimiters = ['\\/', '\\-']
module-attribute
delimiters = raw_delimiters + ['\\.', '[^\\S\\r\\n]+']
module-attribute
raw_delimiter_pattern = make_pattern(raw_delimiters)
module-attribute
raw_delimiter_with_spaces_pattern = make_pattern(raw_delimiters + ['[^\\S\\r\\n]+'])
module-attribute
delimiter_pattern = make_pattern(delimiters)
module-attribute
ante_num_pattern = '(?<!.(?:{raw_delimiter_pattern})|[0-9][.,])'
module-attribute
post_num_pattern = '(?!{raw_delimiter_pattern})'
module-attribute
time
hour_pattern = '(?<!\\d)(?P<hour>0?[1-9]|1\\d|2[0-3])(?!\\d)'
module-attribute
lz_hour_pattern = '(?<!\\d)(?P<hour>0[1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
minute_pattern = '(?<!\\d)(?P<minute>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_minute_pattern = '(?<!\\d)(?P<minute>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
second_pattern = '(?<!\\d)(?P<second>0?[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
lz_second_pattern = '(?<!\\d)(?P<second>0[1-9]|[1-5]\\d)(?!\\d)'
module-attribute
time_pattern = '(\\s.{,3}' + '{hour_pattern}[h:]({lz_minute_pattern})?' + '((:|m|min){lz_second_pattern})?' + ')?'
module-attribute
units
units = ['(?P<unit_year>ans?|ann[ée]es?)', '(?P<unit_semester>semestres?)', '(?P<unit_trimester>trimestres?)', '(?P<unit_month>mois)', '(?P<unit_week>semaines?)', '(?P<unit_day>jours?|journ[ée]es?)', '(?P<unit_hour>h|heures?)', '(?P<unit_minute>min|minutes?)', '(?P<unit_second>sec|secondes?|s)']
module-attribute
unit_pattern = make_pattern(units, with_breaks=True)
module-attribute
days
letter_days = ['(?P<day_01>premier|1\\s*er)', '(?P<day_02>deux)', '(?P<day_03>trois)', '(?P<day_04>quatre)', '(?P<day_05>cinq)', '(?P<day_06>six)', '(?P<day_07>sept)', '(?P<day_08>huit)', '(?P<day_09>neuf)', '(?P<day_10>dix)', '(?P<day_11>onze)', '(?P<day_12>douze)', '(?P<day_13>treize)', '(?P<day_14>quatorze)', '(?P<day_15>quinze)', '(?P<day_16>seize)', '(?P<day_17>dix\\-?\\s*sept)', '(?P<day_18>dix\\-?\\s*huit)', '(?P<day_19>dix\\-?\\s*neuf)', '(?P<day_20>vingt)', '(?P<day_21>vingt\\-?\\s*et\\-?\\s*un)', '(?P<day_22>vingt\\-?\\s*deux)', '(?P<day_23>vingt\\-?\\s*trois)', '(?P<day_24>vingt\\-?\\s*quatre)', '(?P<day_25>vingt\\-?\\s*cinq)', '(?P<day_26>vingt\\-?\\s*six)', '(?P<day_27>vingt\\-?\\s*sept)', '(?P<day_28>vingt\\-?\\s*huit)', '(?P<day_29>vingt\\-?\\s*neuf)', '(?P<day_30>trente)', '(?P<day_31>trente\\-?\\s*et\\-?\\s*un)']
module-attribute
letter_day_pattern = make_pattern(letter_days)
module-attribute
nlz_numeric_day_pattern = '(?<!\\d)([1-9]|[12]\\d|3[01])(?!\\d)'
module-attribute
numeric_day_pattern = '(?P<day>{numeric_day_pattern})'
module-attribute
lz_numeric_day_pattern = '(?P<day>{lz_numeric_day_pattern})'
module-attribute
day_pattern = '({letter_day_pattern}|{numeric_day_pattern})'
module-attribute
numbers
letter_numbers = ["(?P<number_01>l'|le|la|une?|ce|cette|cet)", '(?P<number_02>deux)', '(?P<number_03>trois)', '(?P<number_04>quatre)', '(?P<number_05>cinq)', '(?P<number_06>six)', '(?P<number_07>sept)', '(?P<number_08>huit)', '(?P<number_09>neuf)', '(?P<number_10>dix)', '(?P<number_11>onze)', '(?P<number_12>douze)', '(?P<number_12>treize)', '(?P<number_13>quatorze)', '(?P<number_14>quinze)', '(?P<number_15>seize)', '(?P<number_16>dix[-\\s]sept)', '(?P<number_17>dix[-\\s]huit)', '(?P<number_18>dix[-\\s]neuf)', '(?P<number_20>vingt)', '(?P<number_21>vingt[-\\s]et[-\\s]un)', '(?P<number_22>vingt[-\\s]deux)', '(?P<number_23>vingt[-\\s]trois)', '(?P<number_24>vingt[-\\s]quatre)', '(?P<number_25>vingt[-\\s]cinq)', '(?P<number_26>vingt[-\\s]six)', '(?P<number_27>vingt[-\\s]sept)', '(?P<number_28>vingt[-\\s]huit)', '(?P<number_29>vingt[-\\s]neuf)', '(?P<number_30>trente)']
module-attribute
numeric_numbers = [str(i) for i in range(1, 100)]
module-attribute
letter_number_pattern = make_pattern(letter_numbers, with_breaks=True)
module-attribute
numeric_number_pattern = make_pattern(numeric_numbers, name='number')
module-attribute
number_pattern = '({letter_number_pattern}|{numeric_number_pattern})'
module-attribute
directions
preceding_directions = ['(?P<direction_PAST>depuis|depuis\\s+le|il\\s+y\\s+a)', '(?P<direction_FUTURE>dans)']
module-attribute
following_directions = ['(?P<direction_FUTURE>prochaine?s?|suivante?s?|plus\\s+tard)', '(?P<direction_PAST>derni[eè]re?s?|passée?s?|pr[ée]c[ée]dente?s?|plus\\s+t[ôo]t)']
module-attribute
preceding_direction_pattern = make_pattern(preceding_directions, with_breaks=True)
module-attribute
following_direction_pattern = make_pattern(following_directions, with_breaks=True)
module-attribute
months
letter_months = ['(?P<month_01>janvier|janv\\.?)', '(?P<month_02>f[ée]vrier|f[ée]v\\.?)', '(?P<month_03>mars|mar\\.?)', '(?P<month_04>avril|avr\\.?)', '(?P<month_05>mai)', '(?P<month_06>juin)', '(?P<month_07>juillet|juill?\\.?)', '(?P<month_08>ao[uû]t)', '(?P<month_09>septembre|sept?\\.?)', '(?P<month_10>octobre|oct\\.?)', '(?P<month_11>novembre|nov\\.)', '(?P<month_12>d[ée]cembre|d[ée]c\\.?)']
module-attribute
letter_month_pattern = make_pattern(letter_months, with_breaks=True)
module-attribute
numeric_month_pattern = '(?P<month>{numeric_month_pattern})'
module-attribute
lz_numeric_month_pattern = '(?P<month>{lz_numeric_month_pattern})'
module-attribute
month_pattern = '({letter_month_pattern}|{numeric_month_pattern})'
module-attribute
years
year_patterns: List[str] = ['19\\d\\d'] + [str(year) for year in range(2000, date.today().year + 2)]
module-attribute
full_year_pattern = '(?<!\\d)' + full_year_pattern + '(?!\\d)'
module-attribute
year_pattern = '(?<!\\d)' + year_pattern + '(?!\\d)'
module-attribute
modes
modes = ['(?P<mode_FROM>depuis|depuis\\s+le|[àa]\\s+partir\\s+d[eu]|du)', "(?P<mode_UNTIL>jusqu'[àa]u?|au)"]
module-attribute
mode_pattern = make_pattern(modes, with_breaks=True)
module-attribute
Back to top