Skip to content

edsnlp.pipelines.misc.consultation_dates

patterns

consultation_mention = ['rendez-vous pris', 'consultation', 'consultation.{1,8}examen', 'examen clinique', 'de compte rendu', "date de l'examen", 'examen realise le', 'date de la visite'] module-attribute

town_mention = ['paris', 'kremlin.bicetre', 'creteil', 'boulogne.billancourt', 'villejuif', 'clamart', 'bobigny', 'clichy', 'ivry.sur.seine', 'issy.les.moulineaux', 'draveil', 'limeil', 'champcueil', 'roche.guyon', 'bondy', 'colombes', 'hendaye', 'herck.sur.mer', 'labruyere', 'garches', 'sevran', 'hyeres'] module-attribute

document_date_mention = ['imprime le', 'signe electroniquement', 'signe le', 'saisi le', 'dicte le', 'tape le', 'date de reference', 'date\\s*:', 'dactylographie le', 'date du rapport'] module-attribute

consultation_dates

ConsultationDates

Bases: GenericMatcher

Class to extract consultation dates from "CR-CONS" documents.

The pipeline populates the doc.spans['consultation_dates'] list.

For each extraction s in this list, the corresponding date is available as s._.consultation_date.

PARAMETER DESCRIPTION
nlp

Language pipeline object

TYPE: Language

consultation_mention

List of RegEx for consultation mentions.

  • If type==list: Overrides the default list
  • If type==bool: Uses the default list of True, disable if False

TYPE: Union[List[str], bool]

town_mention : Union[List[str], bool] List of RegEx for all AP-HP hospitals' towns mentions.

- If `type==list`: Overrides the default list
- If `type==bool`: Uses the default list of True, disable if False

document_date_mention : Union[List[str], bool] List of RegEx for document date.

- If `type==list`: Overrides the default list
- If `type==bool`: Uses the default list of True, disable if False
Source code in edsnlp/pipelines/misc/consultation_dates/consultation_dates.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class ConsultationDates(GenericMatcher):
    """
    Class to extract consultation dates from "CR-CONS" documents.

    The pipeline populates the `#!python doc.spans['consultation_dates']` list.

    For each extraction `s` in this list, the corresponding date is available
    as `s._.consultation_date`.

    Parameters
    ----------
    nlp : Language
        Language pipeline object
    consultation_mention : Union[List[str], bool]
        List of RegEx for consultation mentions.

        - If `type==list`: Overrides the default list
        - If `type==bool`: Uses the default list of True, disable if False

    town_mention : Union[List[str], bool]
        List of RegEx for all AP-HP hospitals' towns mentions.

        - If `type==list`: Overrides the default list
        - If `type==bool`: Uses the default list of True, disable if False
    document_date_mention : Union[List[str], bool]
        List of RegEx for document date.

        - If `type==list`: Overrides the default list
        - If `type==bool`: Uses the default list of True, disable if False
    """

    def __init__(
        self,
        nlp: Language,
        consultation_mention: Union[List[str], bool],
        town_mention: Union[List[str], bool],
        document_date_mention: Union[List[str], bool],
        attr: str,
        **kwargs,
    ):

        logger.warning("This pipeline is still in beta")
        logger.warning(
            "This pipeline should ONLY be used on notes "
            "where `note_class_source_value == 'CR-CONS'`"
        )
        logger.warning(
            """This pipeline requires to use the normalizer pipeline with:
        lowercase=True,
        accents=True,
        quotes=True"""
        )

        if not (nlp.has_pipe("dates") and nlp.get_pipe("dates").on_ents_only is False):

            config = dict(**DEFAULT_CONFIG)
            config["on_ents_only"] = "consultation_mentions"

            self.date_matcher = Dates(nlp, **config)

        else:
            self.date_matcher = None

        if not consultation_mention:
            consultation_mention = []
        elif consultation_mention is True:
            consultation_mention = consult_regex.consultation_mention

        if not document_date_mention:
            document_date_mention = []
        elif document_date_mention is True:
            document_date_mention = consult_regex.document_date_mention

        if not town_mention:
            town_mention = []
        elif town_mention is True:
            town_mention = consult_regex.town_mention

        regex = dict(
            consultation_mention=consultation_mention,
            town_mention=town_mention,
            document_date_mention=document_date_mention,
        )

        super().__init__(
            nlp,
            regex=regex,
            terms=dict(),
            attr=attr,
            ignore_excluded=False,
            **kwargs,
        )

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        if not Span.has_extension("consultation_date"):
            Span.set_extension("consultation_date", default=None)

    def __call__(self, doc: Doc) -> Doc:
        """
        Finds entities

        Parameters
        ----------
        doc: spaCy Doc object

        Returns
        -------
        doc: Doc
            spaCy Doc object with additional
            `doc.spans['consultation_dates]` `SpanGroup`
        """

        ents = self.process(doc)

        doc.spans["consultation_mentions"] = ents
        doc.spans["consultation_dates"] = []

        if self.date_matcher is not None:
            doc = self.date_matcher(doc)

        for mention in ents:
            # Looking for a date
            # - In the same sentence
            # - Not less than 10 tokens AFTER the consultation mention
            matching_dates = [
                date
                for date in doc.spans["dates"]
                if (
                    (mention.sent == date.sent)
                    and (date.start > mention.start)
                    and (date.start - mention.end <= 10)
                )
            ]

            if matching_dates:
                # We keep the first mention of a date
                kept_date = min(matching_dates, key=lambda d: d.start)
                span = doc[mention.start : kept_date.end]
                span.label_ = mention.label_
                span._.consultation_date = kept_date._.date

                doc.spans["consultation_dates"].append(span)

        del doc.spans["consultation_mentions"]

        return doc
date_matcher = Dates(nlp, None=config) instance-attribute
__init__(nlp, consultation_mention, town_mention, document_date_mention, attr, **kwargs)
Source code in edsnlp/pipelines/misc/consultation_dates/consultation_dates.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    nlp: Language,
    consultation_mention: Union[List[str], bool],
    town_mention: Union[List[str], bool],
    document_date_mention: Union[List[str], bool],
    attr: str,
    **kwargs,
):

    logger.warning("This pipeline is still in beta")
    logger.warning(
        "This pipeline should ONLY be used on notes "
        "where `note_class_source_value == 'CR-CONS'`"
    )
    logger.warning(
        """This pipeline requires to use the normalizer pipeline with:
    lowercase=True,
    accents=True,
    quotes=True"""
    )

    if not (nlp.has_pipe("dates") and nlp.get_pipe("dates").on_ents_only is False):

        config = dict(**DEFAULT_CONFIG)
        config["on_ents_only"] = "consultation_mentions"

        self.date_matcher = Dates(nlp, **config)

    else:
        self.date_matcher = None

    if not consultation_mention:
        consultation_mention = []
    elif consultation_mention is True:
        consultation_mention = consult_regex.consultation_mention

    if not document_date_mention:
        document_date_mention = []
    elif document_date_mention is True:
        document_date_mention = consult_regex.document_date_mention

    if not town_mention:
        town_mention = []
    elif town_mention is True:
        town_mention = consult_regex.town_mention

    regex = dict(
        consultation_mention=consultation_mention,
        town_mention=town_mention,
        document_date_mention=document_date_mention,
    )

    super().__init__(
        nlp,
        regex=regex,
        terms=dict(),
        attr=attr,
        ignore_excluded=False,
        **kwargs,
    )

    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/misc/consultation_dates/consultation_dates.py
109
110
111
112
@staticmethod
def set_extensions() -> None:
    if not Span.has_extension("consultation_date"):
        Span.set_extension("consultation_date", default=None)
__call__(doc)

Finds entities

PARAMETER DESCRIPTION
doc

TYPE: Doc

RETURNS DESCRIPTION
doc

spaCy Doc object with additional doc.spans['consultation_dates] SpanGroup

Source code in edsnlp/pipelines/misc/consultation_dates/consultation_dates.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
def __call__(self, doc: Doc) -> Doc:
    """
    Finds entities

    Parameters
    ----------
    doc: spaCy Doc object

    Returns
    -------
    doc: Doc
        spaCy Doc object with additional
        `doc.spans['consultation_dates]` `SpanGroup`
    """

    ents = self.process(doc)

    doc.spans["consultation_mentions"] = ents
    doc.spans["consultation_dates"] = []

    if self.date_matcher is not None:
        doc = self.date_matcher(doc)

    for mention in ents:
        # Looking for a date
        # - In the same sentence
        # - Not less than 10 tokens AFTER the consultation mention
        matching_dates = [
            date
            for date in doc.spans["dates"]
            if (
                (mention.sent == date.sent)
                and (date.start > mention.start)
                and (date.start - mention.end <= 10)
            )
        ]

        if matching_dates:
            # We keep the first mention of a date
            kept_date = min(matching_dates, key=lambda d: d.start)
            span = doc[mention.start : kept_date.end]
            span.label_ = mention.label_
            span._.consultation_date = kept_date._.date

            doc.spans["consultation_dates"].append(span)

    del doc.spans["consultation_mentions"]

    return doc

factory

DEFAULT_CONFIG = dict(consultation_mention=True, town_mention=False, document_date_mention=False, attr='NORM') module-attribute

create_component(nlp, name, attr, consultation_mention, town_mention, document_date_mention)

Source code in edsnlp/pipelines/misc/consultation_dates/factory.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
@deprecated_factory(
    "consultation_dates",
    "eds.consultation_dates",
    default_config=DEFAULT_CONFIG,
)
@Language.factory("eds.consultation_dates", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    attr: str,
    consultation_mention: Union[List[str], bool],
    town_mention: Union[List[str], bool],
    document_date_mention: Union[List[str], bool],
):
    return ConsultationDates(
        nlp,
        attr=attr,
        consultation_mention=consultation_mention,
        document_date_mention=document_date_mention,
        town_mention=town_mention,
    )
Back to top