Skip to content

edsnlp.pipelines.misc.dates.models

BaseDate

Bases: BaseModel

Source code in edsnlp/pipelines/misc/dates/models.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class BaseDate(BaseModel):

    mode: Optional[Mode] = None

    @validator("*", pre=True)
    def remove_space(cls, v):
        """Remove spaces. Useful for coping with ill-formatted PDF extractions."""
        if isinstance(v, str):
            return v.replace(" ", "")
        return v

    @root_validator(pre=True)
    def validate_strings(cls, d: Dict[str, str]) -> Dict[str, str]:
        result = d.copy()

        for k, v in d.items():
            if v is not None and "_" in k:
                key, value = k.split("_")
                result.update({key: value})

        return result

remove_space(v)

Remove spaces. Useful for coping with ill-formatted PDF extractions.

Source code in edsnlp/pipelines/misc/dates/models.py
40
41
42
43
44
45
@validator("*", pre=True)
def remove_space(cls, v):
    """Remove spaces. Useful for coping with ill-formatted PDF extractions."""
    if isinstance(v, str):
        return v.replace(" ", "")
    return v

Relative

Bases: BaseDate

Source code in edsnlp/pipelines/misc/dates/models.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
class Relative(BaseDate):

    year: Optional[int] = None
    month: Optional[int] = None
    week: Optional[int] = None
    day: Optional[int] = None
    hour: Optional[int] = None
    minute: Optional[int] = None
    second: Optional[int] = None

    @root_validator(pre=True)
    def parse_unit(cls, d: Dict[str, str]) -> Dict[str, str]:
        """
        Units need to be handled separately.

        This validator modifies the key corresponding to the unit
        with the detected value

        Parameters
        ----------
        d : Dict[str, str]
            Original data

        Returns
        -------
        Dict[str, str]
            Transformed data
        """
        unit = d.get("unit")

        if unit:
            d[unit] = d.get("number")

        return d

    def to_datetime(self, **kwargs) -> pendulum.Duration:
        d = self.dict(exclude_none=True)

        direction = d.pop("direction", None)
        dir = -1 if direction == Direction.PAST else 1

        d.pop("mode", None)

        d = {f"{k}s": v for k, v in d.items()}

        td = dir * pendulum.duration(**d)
        return td

parse_unit(d)

Units need to be handled separately.

This validator modifies the key corresponding to the unit with the detected value

PARAMETER DESCRIPTION
d

Original data

TYPE: Dict[str, str]

RETURNS DESCRIPTION
Dict[str, str]

Transformed data

Source code in edsnlp/pipelines/misc/dates/models.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
@root_validator(pre=True)
def parse_unit(cls, d: Dict[str, str]) -> Dict[str, str]:
    """
    Units need to be handled separately.

    This validator modifies the key corresponding to the unit
    with the detected value

    Parameters
    ----------
    d : Dict[str, str]
        Original data

    Returns
    -------
    Dict[str, str]
        Transformed data
    """
    unit = d.get("unit")

    if unit:
        d[unit] = d.get("number")

    return d

RelativeDate

Bases: Relative

Source code in edsnlp/pipelines/misc/dates/models.py
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
class RelativeDate(Relative):
    direction: Direction = Direction.CURRENT

    def to_datetime(
        self,
        note_datetime: Optional[datetime] = None,
        **kwargs,
    ) -> pendulum.Duration:
        td = super(RelativeDate, self).to_datetime()

        if note_datetime is not None and not isinstance(note_datetime, NaTType):
            return note_datetime + td

        return td

    def norm(self) -> str:

        if self.direction == Direction.CURRENT:
            d = self.dict(exclude_none=True)
            d.pop("direction", None)
            d.pop("mode", None)

            key = next(iter(d.keys()), "day")

            norm = f"~0 {key}"
        else:
            td = self.to_datetime()
            norm = str(td)
            if td.in_seconds() > 0:
                norm = f"+{norm}"

        return norm

    @root_validator(pre=True)
    def handle_specifics(cls, d: Dict[str, str]) -> Dict[str, str]:
        """
        Specific patterns such as `aujourd'hui`, `hier`, etc,
        need to be handled separately.

        Parameters
        ----------
        d : Dict[str, str]
            Original data.

        Returns
        -------
        Dict[str, str]
            Modified data.
        """

        specific = d.get("specific")
        specific = specific_dict.get(specific)

        if specific:
            d.update(specific)

        return d

handle_specifics(d)

Specific patterns such as aujourd'hui, hier, etc, need to be handled separately.

PARAMETER DESCRIPTION
d

Original data.

TYPE: Dict[str, str]

RETURNS DESCRIPTION
Dict[str, str]

Modified data.

Source code in edsnlp/pipelines/misc/dates/models.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
@root_validator(pre=True)
def handle_specifics(cls, d: Dict[str, str]) -> Dict[str, str]:
    """
    Specific patterns such as `aujourd'hui`, `hier`, etc,
    need to be handled separately.

    Parameters
    ----------
    d : Dict[str, str]
        Original data.

    Returns
    -------
    Dict[str, str]
        Modified data.
    """

    specific = d.get("specific")
    specific = specific_dict.get(specific)

    if specific:
        d.update(specific)

    return d