Skip to content

edsnlp.pipelines.misc.dates.patterns

false_positive

false_positive_pattern = make_pattern(['(\\d+' + delimiter_pattern + '){3,}\\d+(?!:\\d\\d)\\b', '\\d\\/\\d']) module-attribute

relative

specific = {'minus1': ('hier', dict(direction='PAST', day=1)), 'minus2': ('avant[-\\s]hier', dict(direction='PAST', day=2)), 'plus1': ('demain', dict(direction='FUTURE', day=1)), 'plus2': ('après[-\\s]demain', dict(direction='FUTURE', day=2))} module-attribute

specific_pattern = make_pattern(['(?P<specific_{k}>{p})' for (k, (p, _)) in specific.items()]) module-attribute

specific_dict = {k: v for (k, (_, v)) in specific.items()} module-attribute

relative_pattern = ['(?<=' + mode_pattern + '.{,3})?' + p for p in relative_pattern] module-attribute

make_specific_pattern(mode='forward')

Source code in edsnlp/pipelines/misc/dates/patterns/relative.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def make_specific_pattern(mode: str = "forward"):

    if mode == "forward":
        p = directions.preceding_direction_pattern
        p += r"\s+"
        p += numbers.number_pattern
        p += r"\s*"
        p += units.unit_pattern
    elif mode == "backward":
        p = numbers.number_pattern
        p += r"\s*"
        p += units.unit_pattern
        p += r"\s+"
        p += directions.following_direction_pattern
    else:
        p = directions.preceding_direction_pattern
        p += r"\s+"
        p += numbers.number_pattern
        p += r"\s*"
        p += units.unit_pattern
        p += r"\s+"
        p += directions.following_direction_pattern

    return p

current

current_patterns: List[str] = ['(?P<year_0>cette\\s+ann[ée]e)(?![-\\s]l[àa])', "(?P<day_0>ce\\s+jour|aujourd['\\s]?hui)", '(?P<week_0>cette\\s+semaine|ces\\sjours[-\\s]ci)', '(?P<month_0>ce\\smois([-\\s]ci)?)'] module-attribute

current_pattern = make_pattern(current_patterns, with_breaks=True) module-attribute

absolute

no_year_pattern = [day + raw_delimiter_with_spaces_pattern + month + time_pattern + post_num_pattern for day in [ante_num_pattern + numeric_day_pattern, letter_day_pattern] for month in [numeric_month_pattern + post_num_pattern, letter_month_pattern]] module-attribute

no_day_pattern = [letter_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern, ante_num_pattern + lz_numeric_month_pattern + raw_delimiter_with_spaces_pattern + year_pattern + post_num_pattern] module-attribute

full_year_pattern = ante_num_pattern + fy_pattern + post_num_pattern module-attribute

absolute_pattern = ['(?<=' + mode_pattern + '.{,3})?' + p for p in absolute_pattern] module-attribute

duration

cue_pattern = '(pendant|durant|pdt)' module-attribute

duration_pattern = [cue_pattern + '.{,3}' + numbers.number_pattern + '\\s*' + units.unit_pattern] module-attribute

atomic

delimiters

raw_delimiters = ['\\/', '\\-'] module-attribute
delimiters = raw_delimiters + ['\\.', '[^\\S\\r\\n]+'] module-attribute
raw_delimiter_pattern = make_pattern(raw_delimiters) module-attribute
raw_delimiter_with_spaces_pattern = make_pattern(raw_delimiters + ['[^\\S\\r\\n]+']) module-attribute
delimiter_pattern = make_pattern(delimiters) module-attribute
ante_num_pattern = '(?<!.(?:{raw_delimiter_pattern})|[0-9][.,])' module-attribute
post_num_pattern = '(?!{raw_delimiter_pattern})' module-attribute

time

hour_pattern = '(?<!\\d)(?P<hour>0?[1-9]|1\\d|2[0-3])(?!\\d)' module-attribute
lz_hour_pattern = '(?<!\\d)(?P<hour>0[1-9]|[12]\\d|3[01])(?!\\d)' module-attribute
minute_pattern = '(?<!\\d)(?P<minute>0?[1-9]|[1-5]\\d)(?!\\d)' module-attribute
lz_minute_pattern = '(?<!\\d)(?P<minute>0[1-9]|[1-5]\\d)(?!\\d)' module-attribute
second_pattern = '(?<!\\d)(?P<second>0?[1-9]|[1-5]\\d)(?!\\d)' module-attribute
lz_second_pattern = '(?<!\\d)(?P<second>0[1-9]|[1-5]\\d)(?!\\d)' module-attribute
time_pattern = '(\\s.{,3}' + '{hour_pattern}[h:]({lz_minute_pattern})?' + '((:|m|min){lz_second_pattern})?' + ')?' module-attribute

units

units = ['(?P<unit_year>ans?|ann[ée]es?)', '(?P<unit_semester>semestres?)', '(?P<unit_trimester>trimestres?)', '(?P<unit_month>mois)', '(?P<unit_week>semaines?)', '(?P<unit_day>jours?|journ[ée]es?)', '(?P<unit_hour>h|heures?)', '(?P<unit_minute>min|minutes?)', '(?P<unit_second>sec|secondes?|s)'] module-attribute
unit_pattern = make_pattern(units, with_breaks=True) module-attribute

days

letter_days = ['(?P<day_01>premier|1\\s*er)', '(?P<day_02>deux)', '(?P<day_03>trois)', '(?P<day_04>quatre)', '(?P<day_05>cinq)', '(?P<day_06>six)', '(?P<day_07>sept)', '(?P<day_08>huit)', '(?P<day_09>neuf)', '(?P<day_10>dix)', '(?P<day_11>onze)', '(?P<day_12>douze)', '(?P<day_13>treize)', '(?P<day_14>quatorze)', '(?P<day_15>quinze)', '(?P<day_16>seize)', '(?P<day_17>dix\\-?\\s*sept)', '(?P<day_18>dix\\-?\\s*huit)', '(?P<day_19>dix\\-?\\s*neuf)', '(?P<day_20>vingt)', '(?P<day_21>vingt\\-?\\s*et\\-?\\s*un)', '(?P<day_22>vingt\\-?\\s*deux)', '(?P<day_23>vingt\\-?\\s*trois)', '(?P<day_24>vingt\\-?\\s*quatre)', '(?P<day_25>vingt\\-?\\s*cinq)', '(?P<day_26>vingt\\-?\\s*six)', '(?P<day_27>vingt\\-?\\s*sept)', '(?P<day_28>vingt\\-?\\s*huit)', '(?P<day_29>vingt\\-?\\s*neuf)', '(?P<day_30>trente)', '(?P<day_31>trente\\-?\\s*et\\-?\\s*un)'] module-attribute
letter_day_pattern = make_pattern(letter_days) module-attribute
nlz_numeric_day_pattern = '(?<!\\d)([1-9]|[12]\\d|3[01])(?!\\d)' module-attribute
numeric_day_pattern = '(?P<day>{numeric_day_pattern})' module-attribute
lz_numeric_day_pattern = '(?P<day>{lz_numeric_day_pattern})' module-attribute
day_pattern = '({letter_day_pattern}|{numeric_day_pattern})' module-attribute

numbers

letter_numbers = ["(?P<number_01>l'|le|la|une?|ce|cette|cet)", '(?P<number_02>deux)', '(?P<number_03>trois)', '(?P<number_04>quatre)', '(?P<number_05>cinq)', '(?P<number_06>six)', '(?P<number_07>sept)', '(?P<number_08>huit)', '(?P<number_09>neuf)', '(?P<number_10>dix)', '(?P<number_11>onze)', '(?P<number_12>douze)', '(?P<number_12>treize)', '(?P<number_13>quatorze)', '(?P<number_14>quinze)', '(?P<number_15>seize)', '(?P<number_16>dix[-\\s]sept)', '(?P<number_17>dix[-\\s]huit)', '(?P<number_18>dix[-\\s]neuf)', '(?P<number_20>vingt)', '(?P<number_21>vingt[-\\s]et[-\\s]un)', '(?P<number_22>vingt[-\\s]deux)', '(?P<number_23>vingt[-\\s]trois)', '(?P<number_24>vingt[-\\s]quatre)', '(?P<number_25>vingt[-\\s]cinq)', '(?P<number_26>vingt[-\\s]six)', '(?P<number_27>vingt[-\\s]sept)', '(?P<number_28>vingt[-\\s]huit)', '(?P<number_29>vingt[-\\s]neuf)', '(?P<number_30>trente)'] module-attribute
numeric_numbers = [str(i) for i in range(1, 100)] module-attribute
letter_number_pattern = make_pattern(letter_numbers, with_breaks=True) module-attribute
numeric_number_pattern = make_pattern(numeric_numbers, name='number') module-attribute
number_pattern = '({letter_number_pattern}|{numeric_number_pattern})' module-attribute

directions

preceding_directions = ['(?P<direction_PAST>depuis|depuis\\s+le|il\\s+y\\s+a)', '(?P<direction_FUTURE>dans)'] module-attribute
following_directions = ['(?P<direction_FUTURE>prochaine?s?|suivante?s?|plus\\s+tard)', '(?P<direction_PAST>derni[eè]re?s?|passée?s?|pr[ée]c[ée]dente?s?|plus\\s+t[ôo]t)'] module-attribute
preceding_direction_pattern = make_pattern(preceding_directions, with_breaks=True) module-attribute
following_direction_pattern = make_pattern(following_directions, with_breaks=True) module-attribute

months

letter_months = ['(?P<month_01>janvier|janv\\.?)', '(?P<month_02>f[ée]vrier|f[ée]v\\.?)', '(?P<month_03>mars|mar\\.?)', '(?P<month_04>avril|avr\\.?)', '(?P<month_05>mai)', '(?P<month_06>juin)', '(?P<month_07>juillet|juill?\\.?)', '(?P<month_08>ao[uû]t)', '(?P<month_09>septembre|sept?\\.?)', '(?P<month_10>octobre|oct\\.?)', '(?P<month_11>novembre|nov\\.)', '(?P<month_12>d[ée]cembre|d[ée]c\\.?)'] module-attribute
letter_month_pattern = make_pattern(letter_months, with_breaks=True) module-attribute
numeric_month_pattern = '(?P<month>{numeric_month_pattern})' module-attribute
lz_numeric_month_pattern = '(?P<month>{lz_numeric_month_pattern})' module-attribute
month_pattern = '({letter_month_pattern}|{numeric_month_pattern})' module-attribute

years

year_patterns: List[str] = ['19\\d\\d'] + [str(year) for year in range(2000, date.today().year + 2)] module-attribute
full_year_pattern = '(?<!\\d)' + full_year_pattern + '(?!\\d)' module-attribute
year_pattern = '(?<!\\d)' + year_pattern + '(?!\\d)' module-attribute

modes

modes = ['(?P<mode_FROM>depuis|depuis\\s+le|[àa]\\s+partir\\s+d[eu]|du)', "(?P<mode_UNTIL>jusqu'[àa]u?|au)"] module-attribute
mode_pattern = make_pattern(modes, with_breaks=True) module-attribute
Back to top