Skip to content

edsnlp.pipelines.misc.reason

patterns

reasons = dict(reasons=['(?i)motif de l.?hospitalisation : .+', '(?i)hospitalis[ée].?.*(pour|. cause|suite [àa]).+', '(?i)(consulte|prise en charge(?!\\set\\svous\\sassurer\\sun\\straitement\\sadapté)).*pour.+', '(?i)motif\\sd.hospitalisation\\s:.+', '(?i)au total\\s?\\:?\\s?\\n?.+', '(?i)motif\\sde\\sla\\sconsultation', '(?i)motif\\sd.admission', '(?i)conclusion\\smedicale']) module-attribute

sections_reason = ['motif', 'conclusion'] module-attribute

section_exclude = ['antécédents', 'antécédents familiaux', 'histoire de la maladie'] module-attribute

factory

DEFAULT_CONFIG = dict(reasons=None, attr='TEXT', use_sections=False, ignore_excluded=False) module-attribute

create_component(nlp, name, reasons, attr, use_sections, ignore_excluded)

Source code in edsnlp/pipelines/misc/reason/factory.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
@deprecated_factory("reason", "eds.reason", default_config=DEFAULT_CONFIG)
@Language.factory("eds.reason", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    reasons: Optional[Dict[str, Union[List[str], str]]],
    attr: str,
    use_sections: bool,
    ignore_excluded: bool,
):
    return Reason(
        nlp,
        reasons=reasons,
        attr=attr,
        use_sections=use_sections,
        ignore_excluded=ignore_excluded,
    )

reason

Reason

Bases: GenericMatcher

Pipeline to identify the reason of the hospitalisation.

It declares a Span extension called ents_reason and adds the key reasons to doc.spans.

It also declares the boolean extension is_reason. This extension is set to True for the Reason Spans but also for the entities that overlap the reason span.

PARAMETER DESCRIPTION
nlp

spaCy nlp pipeline to use for matching.

TYPE: Language

reasons

The terminology of reasons.

TYPE: Optional[Dict[str, Union[List[str], str]]]

attr

spaCy's attribute to use: a string with the value "TEXT" or "NORM", or a dict with the key 'term_attr'. We can also add a key for each regex.

TYPE: str

use_sections

whether or not use the sections pipeline to improve results.

TYPE: bool,

ignore_excluded

Whether to skip excluded tokens.

TYPE: bool

Source code in edsnlp/pipelines/misc/reason/reason.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
class Reason(GenericMatcher):
    """Pipeline to identify the reason of the hospitalisation.

    It declares a Span extension called `ents_reason` and adds
    the key `reasons` to doc.spans.

    It also declares the boolean extension `is_reason`.
    This extension is set to True for the Reason Spans but also
    for the entities that overlap the reason span.

    Parameters
    ----------
    nlp : Language
        spaCy nlp pipeline to use for matching.
    reasons : Optional[Dict[str, Union[List[str], str]]]
        The terminology of reasons.
    attr : str
        spaCy's attribute to use:
        a string with the value "TEXT" or "NORM", or a dict with
        the key 'term_attr'. We can also add a key for each regex.
    use_sections : bool,
        whether or not use the `sections` pipeline to improve results.
    ignore_excluded : bool
        Whether to skip excluded tokens.
    """

    def __init__(
        self,
        nlp: Language,
        reasons: Optional[Dict[str, Union[List[str], str]]],
        attr: Union[Dict[str, str], str],
        use_sections: bool,
        ignore_excluded: bool,
    ):

        if reasons is None:
            reasons = patterns.reasons

        super().__init__(
            nlp,
            terms=None,
            regex=reasons,
            attr=attr,
            ignore_excluded=ignore_excluded,
        )

        self.use_sections = use_sections and (
            "eds.sections" in self.nlp.pipe_names or "sections" in self.nlp.pipe_names
        )
        if use_sections and not self.use_sections:
            logger.warning(
                "You have requested that the pipeline use annotations "
                "provided by the `eds.section` pipeline, but it was not set. "
                "Skipping that step."
            )

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:

        if not Span.has_extension("ents_reason"):
            Span.set_extension("ents_reason", default=None)

        if not Span.has_extension("is_reason"):
            Span.set_extension("is_reason", default=False)

    def _enhance_with_sections(self, sections: Iterable, reasons: Iterable) -> List:
        """Enhance the list of reasons with the section information.
        If the reason overlaps with history, so it will be removed from the list

        Parameters
        ----------
        sections : Iterable
            Spans of sections identified with the `sections` pipeline
        reasons : Iterable
            Reasons list identified by the regex

        Returns
        -------
        List
            Updated list of spans reasons
        """

        for section in sections:
            if section.label_ in patterns.sections_reason:
                reasons.append(section)

            if section.label_ in patterns.section_exclude:
                for reason in reasons:
                    if check_inclusion(reason, section.start, section.end):
                        reasons.remove(reason)

        return reasons

    def __call__(self, doc: Doc) -> Doc:
        """Find spans related to the reasons of the hospitalisation

        Parameters
        ----------
        doc : Doc

        Returns
        -------
        Doc
        """
        matches = self.process(doc)
        reasons = get_spans(matches, "reasons")

        if self.use_sections:
            sections = doc.spans["sections"]
            reasons = self._enhance_with_sections(sections=sections, reasons=reasons)

        doc.spans["reasons"] = reasons

        # Entities
        if len(doc.ents) > 0:
            for reason in reasons:  # TODO optimize this iteration
                ent_list = []
                for ent in doc.ents:
                    if check_inclusion(ent, reason.start, reason.end):
                        ent_list.append(ent)
                        ent._.is_reason = True

                reason._.ents_reason = ent_list
                reason._.is_reason = True

        return doc
use_sections = use_sections and 'eds.sections' in self.nlp.pipe_names or 'sections' in self.nlp.pipe_names instance-attribute
__init__(nlp, reasons, attr, use_sections, ignore_excluded)
Source code in edsnlp/pipelines/misc/reason/reason.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    nlp: Language,
    reasons: Optional[Dict[str, Union[List[str], str]]],
    attr: Union[Dict[str, str], str],
    use_sections: bool,
    ignore_excluded: bool,
):

    if reasons is None:
        reasons = patterns.reasons

    super().__init__(
        nlp,
        terms=None,
        regex=reasons,
        attr=attr,
        ignore_excluded=ignore_excluded,
    )

    self.use_sections = use_sections and (
        "eds.sections" in self.nlp.pipe_names or "sections" in self.nlp.pipe_names
    )
    if use_sections and not self.use_sections:
        logger.warning(
            "You have requested that the pipeline use annotations "
            "provided by the `eds.section` pipeline, but it was not set. "
            "Skipping that step."
        )

    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/misc/reason/reason.py
71
72
73
74
75
76
77
78
@staticmethod
def set_extensions() -> None:

    if not Span.has_extension("ents_reason"):
        Span.set_extension("ents_reason", default=None)

    if not Span.has_extension("is_reason"):
        Span.set_extension("is_reason", default=False)
_enhance_with_sections(sections, reasons)

Enhance the list of reasons with the section information. If the reason overlaps with history, so it will be removed from the list

PARAMETER DESCRIPTION
sections

Spans of sections identified with the sections pipeline

TYPE: Iterable

reasons

Reasons list identified by the regex

TYPE: Iterable

RETURNS DESCRIPTION
List

Updated list of spans reasons

Source code in edsnlp/pipelines/misc/reason/reason.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def _enhance_with_sections(self, sections: Iterable, reasons: Iterable) -> List:
    """Enhance the list of reasons with the section information.
    If the reason overlaps with history, so it will be removed from the list

    Parameters
    ----------
    sections : Iterable
        Spans of sections identified with the `sections` pipeline
    reasons : Iterable
        Reasons list identified by the regex

    Returns
    -------
    List
        Updated list of spans reasons
    """

    for section in sections:
        if section.label_ in patterns.sections_reason:
            reasons.append(section)

        if section.label_ in patterns.section_exclude:
            for reason in reasons:
                if check_inclusion(reason, section.start, section.end):
                    reasons.remove(reason)

    return reasons
__call__(doc)

Find spans related to the reasons of the hospitalisation

PARAMETER DESCRIPTION
doc

TYPE: Doc

RETURNS DESCRIPTION
Doc
Source code in edsnlp/pipelines/misc/reason/reason.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __call__(self, doc: Doc) -> Doc:
    """Find spans related to the reasons of the hospitalisation

    Parameters
    ----------
    doc : Doc

    Returns
    -------
    Doc
    """
    matches = self.process(doc)
    reasons = get_spans(matches, "reasons")

    if self.use_sections:
        sections = doc.spans["sections"]
        reasons = self._enhance_with_sections(sections=sections, reasons=reasons)

    doc.spans["reasons"] = reasons

    # Entities
    if len(doc.ents) > 0:
        for reason in reasons:  # TODO optimize this iteration
            ent_list = []
            for ent in doc.ents:
                if check_inclusion(ent, reason.start, reason.end):
                    ent_list.append(ent)
                    ent._.is_reason = True

            reason._.ents_reason = ent_list
            reason._.is_reason = True

    return doc
Back to top