Skip to content

edsnlp.pipelines.misc.dates.models

Direction

Bases: Enum

Source code in edsnlp/pipelines/misc/dates/models.py
13
14
15
16
17
class Direction(Enum):

    FUTURE = "FUTURE"
    PAST = "PAST"
    CURRENT = "CURRENT"

FUTURE = 'FUTURE' class-attribute

PAST = 'PAST' class-attribute

CURRENT = 'CURRENT' class-attribute

Mode

Bases: Enum

Source code in edsnlp/pipelines/misc/dates/models.py
20
21
22
23
24
class Mode(Enum):

    FROM = "FROM"
    UNTIL = "UNTIL"
    DURATION = "DURATION"

FROM = 'FROM' class-attribute

UNTIL = 'UNTIL' class-attribute

DURATION = 'DURATION' class-attribute

Period

Bases: BaseModel

Source code in edsnlp/pipelines/misc/dates/models.py
27
28
29
30
31
32
33
class Period(BaseModel):
    FROM: Optional[Span] = None
    UNTIL: Optional[Span] = None
    DURATION: Optional[Span] = None

    class Config:
        arbitrary_types_allowed = True

FROM: Optional[Span] = None class-attribute

UNTIL: Optional[Span] = None class-attribute

DURATION: Optional[Span] = None class-attribute

Config

Source code in edsnlp/pipelines/misc/dates/models.py
32
33
class Config:
    arbitrary_types_allowed = True
arbitrary_types_allowed = True class-attribute

BaseDate

Bases: BaseModel

Source code in edsnlp/pipelines/misc/dates/models.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class BaseDate(BaseModel):

    mode: Optional[Mode] = None

    @validator("*", pre=True)
    def remove_space(cls, v):
        """Remove spaces. Useful for coping with ill-formatted PDF extractions."""
        if isinstance(v, str):
            return v.replace(" ", "")
        return v

    @root_validator(pre=True)
    def validate_strings(cls, d: Dict[str, str]) -> Dict[str, str]:
        result = d.copy()

        for k, v in d.items():
            if v is not None and "_" in k:
                key, value = k.split("_")
                result.update({key: value})

        return result

mode: Optional[Mode] = None class-attribute

remove_space(v)

Remove spaces. Useful for coping with ill-formatted PDF extractions.

Source code in edsnlp/pipelines/misc/dates/models.py
40
41
42
43
44
45
@validator("*", pre=True)
def remove_space(cls, v):
    """Remove spaces. Useful for coping with ill-formatted PDF extractions."""
    if isinstance(v, str):
        return v.replace(" ", "")
    return v

validate_strings(d)

Source code in edsnlp/pipelines/misc/dates/models.py
47
48
49
50
51
52
53
54
55
56
@root_validator(pre=True)
def validate_strings(cls, d: Dict[str, str]) -> Dict[str, str]:
    result = d.copy()

    for k, v in d.items():
        if v is not None and "_" in k:
            key, value = k.split("_")
            result.update({key: value})

    return result

AbsoluteDate

Bases: BaseDate

Source code in edsnlp/pipelines/misc/dates/models.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class AbsoluteDate(BaseDate):

    year: Optional[int] = None
    month: Optional[int] = None
    day: Optional[int] = None
    hour: Optional[int] = None
    minute: Optional[int] = None
    second: Optional[int] = None

    def to_datetime(
        self,
        tz: Union[str, pendulum.tz.timezone] = "Europe/Paris",
        note_datetime: Optional[datetime] = None,
        infer_from_context: bool = False,
        default_day=1,
        default_month=1,
        **kwargs,
    ) -> Optional[pendulum.datetime]:

        d = self.dict(exclude_none=True)
        d.pop("mode", None)
        if self.year and self.month and self.day:
            try:
                return pendulum.datetime(**d, tz=tz)
            except ValueError:
                return None

        elif infer_from_context:
            # no year
            if (
                not self.year
                and self.month
                and self.day
                and note_datetime
                and not isinstance(note_datetime, NaTType)
            ):
                d["year"] = note_datetime.year
                return pendulum.datetime(**d, tz=tz)

            # no day
            elif self.year and self.month and not self.day:
                d["day"] = default_day
                return pendulum.datetime(**d, tz=tz)

            # year only
            elif self.year and not self.month and not self.day:
                d["day"] = default_day
                d["month"] = default_month
                return pendulum.datetime(**d, tz=tz)

            # month only
            elif (
                not self.year
                and self.month
                and not self.day
                and note_datetime
                and not isinstance(note_datetime, NaTType)
            ):
                d["day"] = default_day
                d["year"] = note_datetime.year
                return pendulum.datetime(**d, tz=tz)
            return None

        return None

    def norm(self) -> str:

        year = str(self.year) if self.year else "????"
        month = f"{self.month:02}" if self.month else "??"
        day = f"{self.day:02}" if self.day else "??"

        norm = "-".join([year, month, day])

        if self.hour:
            norm += f" {self.hour:02}h"

        if self.minute:
            norm += f"{self.minute:02}m"

        if self.second:
            norm += f"{self.second:02}s"

        return norm

    @validator("year")
    def validate_year(cls, v):
        if v > 100:
            return v

        if v < 25:
            return 2000 + v

year: Optional[int] = None class-attribute

month: Optional[int] = None class-attribute

day: Optional[int] = None class-attribute

hour: Optional[int] = None class-attribute

minute: Optional[int] = None class-attribute

second: Optional[int] = None class-attribute

to_datetime(tz='Europe/Paris', note_datetime=None, infer_from_context=False, default_day=1, default_month=1, **kwargs)

Source code in edsnlp/pipelines/misc/dates/models.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def to_datetime(
    self,
    tz: Union[str, pendulum.tz.timezone] = "Europe/Paris",
    note_datetime: Optional[datetime] = None,
    infer_from_context: bool = False,
    default_day=1,
    default_month=1,
    **kwargs,
) -> Optional[pendulum.datetime]:

    d = self.dict(exclude_none=True)
    d.pop("mode", None)
    if self.year and self.month and self.day:
        try:
            return pendulum.datetime(**d, tz=tz)
        except ValueError:
            return None

    elif infer_from_context:
        # no year
        if (
            not self.year
            and self.month
            and self.day
            and note_datetime
            and not isinstance(note_datetime, NaTType)
        ):
            d["year"] = note_datetime.year
            return pendulum.datetime(**d, tz=tz)

        # no day
        elif self.year and self.month and not self.day:
            d["day"] = default_day
            return pendulum.datetime(**d, tz=tz)

        # year only
        elif self.year and not self.month and not self.day:
            d["day"] = default_day
            d["month"] = default_month
            return pendulum.datetime(**d, tz=tz)

        # month only
        elif (
            not self.year
            and self.month
            and not self.day
            and note_datetime
            and not isinstance(note_datetime, NaTType)
        ):
            d["day"] = default_day
            d["year"] = note_datetime.year
            return pendulum.datetime(**d, tz=tz)
        return None

    return None

norm()

Source code in edsnlp/pipelines/misc/dates/models.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def norm(self) -> str:

    year = str(self.year) if self.year else "????"
    month = f"{self.month:02}" if self.month else "??"
    day = f"{self.day:02}" if self.day else "??"

    norm = "-".join([year, month, day])

    if self.hour:
        norm += f" {self.hour:02}h"

    if self.minute:
        norm += f"{self.minute:02}m"

    if self.second:
        norm += f"{self.second:02}s"

    return norm

validate_year(v)

Source code in edsnlp/pipelines/misc/dates/models.py
143
144
145
146
147
148
149
@validator("year")
def validate_year(cls, v):
    if v > 100:
        return v

    if v < 25:
        return 2000 + v

Relative

Bases: BaseDate

Source code in edsnlp/pipelines/misc/dates/models.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
class Relative(BaseDate):

    year: Optional[int] = None
    month: Optional[int] = None
    week: Optional[int] = None
    day: Optional[int] = None
    hour: Optional[int] = None
    minute: Optional[int] = None
    second: Optional[int] = None

    @root_validator(pre=True)
    def parse_unit(cls, d: Dict[str, str]) -> Dict[str, str]:
        """
        Units need to be handled separately.

        This validator modifies the key corresponding to the unit
        with the detected value

        Parameters
        ----------
        d : Dict[str, str]
            Original data

        Returns
        -------
        Dict[str, str]
            Transformed data
        """
        unit = d.get("unit")

        if unit:
            d[unit] = d.get("number")

        return d

    def to_datetime(self, **kwargs) -> pendulum.Duration:
        d = self.dict(exclude_none=True)

        direction = d.pop("direction", None)
        dir = -1 if direction == Direction.PAST else 1

        d.pop("mode", None)

        d = {f"{k}s": v for k, v in d.items()}

        td = dir * pendulum.duration(**d)
        return td

year: Optional[int] = None class-attribute

month: Optional[int] = None class-attribute

week: Optional[int] = None class-attribute

day: Optional[int] = None class-attribute

hour: Optional[int] = None class-attribute

minute: Optional[int] = None class-attribute

second: Optional[int] = None class-attribute

parse_unit(d)

Units need to be handled separately.

This validator modifies the key corresponding to the unit with the detected value

PARAMETER DESCRIPTION
d

Original data

TYPE: Dict[str, str]

RETURNS DESCRIPTION
Dict[str, str]

Transformed data

Source code in edsnlp/pipelines/misc/dates/models.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
@root_validator(pre=True)
def parse_unit(cls, d: Dict[str, str]) -> Dict[str, str]:
    """
    Units need to be handled separately.

    This validator modifies the key corresponding to the unit
    with the detected value

    Parameters
    ----------
    d : Dict[str, str]
        Original data

    Returns
    -------
    Dict[str, str]
        Transformed data
    """
    unit = d.get("unit")

    if unit:
        d[unit] = d.get("number")

    return d

to_datetime(**kwargs)

Source code in edsnlp/pipelines/misc/dates/models.py
187
188
189
190
191
192
193
194
195
196
197
198
def to_datetime(self, **kwargs) -> pendulum.Duration:
    d = self.dict(exclude_none=True)

    direction = d.pop("direction", None)
    dir = -1 if direction == Direction.PAST else 1

    d.pop("mode", None)

    d = {f"{k}s": v for k, v in d.items()}

    td = dir * pendulum.duration(**d)
    return td

RelativeDate

Bases: Relative

Source code in edsnlp/pipelines/misc/dates/models.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
class RelativeDate(Relative):
    direction: Direction = Direction.CURRENT

    def to_datetime(
        self,
        note_datetime: Optional[datetime] = None,
        **kwargs,
    ) -> pendulum.Duration:
        td = super(RelativeDate, self).to_datetime()

        if note_datetime is not None and not isinstance(note_datetime, NaTType):
            return note_datetime + td

        return td

    def norm(self) -> str:

        if self.direction == Direction.CURRENT:
            d = self.dict(exclude_none=True)
            d.pop("direction", None)
            d.pop("mode", None)

            key = next(iter(d.keys()), "day")

            norm = f"~0 {key}"
        else:
            td = self.to_datetime()
            norm = str(td)
            if td.in_seconds() > 0:
                norm = f"+{norm}"

        return norm

    @root_validator(pre=True)
    def handle_specifics(cls, d: Dict[str, str]) -> Dict[str, str]:
        """
        Specific patterns such as `aujourd'hui`, `hier`, etc,
        need to be handled separately.

        Parameters
        ----------
        d : Dict[str, str]
            Original data.

        Returns
        -------
        Dict[str, str]
            Modified data.
        """

        specific = d.get("specific")
        specific = specific_dict.get(specific)

        if specific:
            d.update(specific)

        return d

direction: Direction = Direction.CURRENT class-attribute

to_datetime(note_datetime=None, **kwargs)

Source code in edsnlp/pipelines/misc/dates/models.py
204
205
206
207
208
209
210
211
212
213
214
def to_datetime(
    self,
    note_datetime: Optional[datetime] = None,
    **kwargs,
) -> pendulum.Duration:
    td = super(RelativeDate, self).to_datetime()

    if note_datetime is not None and not isinstance(note_datetime, NaTType):
        return note_datetime + td

    return td

norm()

Source code in edsnlp/pipelines/misc/dates/models.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def norm(self) -> str:

    if self.direction == Direction.CURRENT:
        d = self.dict(exclude_none=True)
        d.pop("direction", None)
        d.pop("mode", None)

        key = next(iter(d.keys()), "day")

        norm = f"~0 {key}"
    else:
        td = self.to_datetime()
        norm = str(td)
        if td.in_seconds() > 0:
            norm = f"+{norm}"

    return norm

handle_specifics(d)

Specific patterns such as aujourd'hui, hier, etc, need to be handled separately.

PARAMETER DESCRIPTION
d

Original data.

TYPE: Dict[str, str]

RETURNS DESCRIPTION
Dict[str, str]

Modified data.

Source code in edsnlp/pipelines/misc/dates/models.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
@root_validator(pre=True)
def handle_specifics(cls, d: Dict[str, str]) -> Dict[str, str]:
    """
    Specific patterns such as `aujourd'hui`, `hier`, etc,
    need to be handled separately.

    Parameters
    ----------
    d : Dict[str, str]
        Original data.

    Returns
    -------
    Dict[str, str]
        Modified data.
    """

    specific = d.get("specific")
    specific = specific_dict.get(specific)

    if specific:
        d.update(specific)

    return d

Duration

Bases: Relative

Source code in edsnlp/pipelines/misc/dates/models.py
260
261
262
263
264
265
266
class Duration(Relative):
    mode: Mode = Mode.DURATION

    def norm(self) -> str:

        td = self.to_datetime()
        return f"during {td}"

mode: Mode = Mode.DURATION class-attribute

norm()

Source code in edsnlp/pipelines/misc/dates/models.py
263
264
265
266
def norm(self) -> str:

    td = self.to_datetime()
    return f"during {td}"