Skip to content

edsnlp.pipelines.ner.disorders.base

DisorderMatcher

Bases: ContextualMatcher

Base class used to implement various disorders or behaviors extraction pipes

PARAMETER DESCRIPTION
nlp

spaCy Language object.

TYPE: Language

name

The name of the pipe

TYPE: str

patterns

The configuration dictionary

TYPE: Union[Dict[str, Any], List[Dict[str, Any]]]

include_assigned

Whether to include (eventual) assign matches to the final entity

TYPE: bool DEFAULT: True

ignore_excluded

Whether to skip excluded tokens during matching.

TYPE: bool DEFAULT: True

ignore_space_tokens

Whether to skip space tokens during matching.

TYPE: bool DEFAULT: True

detailled_statusmapping

Mapping from integer status (0, 1 or 2) to human-readable string

TYPE: Optional[Dict[int, str]] DEFAULT: None

alignment_mode : str Overwrite alignment mode. regex_flags : Union[re.RegexFlag, int] RegExp flags to use when matching, filtering and assigning (See here)

Source code in edsnlp/pipelines/ner/disorders/base.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class DisorderMatcher(ContextualMatcher):
    """
    Base class used to implement various disorders or behaviors extraction pipes

    Parameters
    ----------
    nlp : Language
        spaCy `Language` object.
    name : str
        The name of the pipe
    patterns: Union[Dict[str, Any], List[Dict[str, Any]]]
        The configuration dictionary
    include_assigned : bool
        Whether to include (eventual) assign matches to the final entity
    ignore_excluded : bool
        Whether to skip excluded tokens during matching.
    ignore_space_tokens: bool
        Whether to skip space tokens during matching.
    detailled_statusmapping: Optional[Dict[int, str]]
        Mapping from integer status (0, 1 or 2) to human-readable string

    alignment_mode : str
        Overwrite alignment mode.
    regex_flags : Union[re.RegexFlag, int]
        RegExp flags to use when matching, filtering and assigning (See
        [here](https://docs.python.org/3/library/re.html#flags))

    """

    def __init__(
        self,
        nlp: Language,
        name: str,
        patterns: Union[Dict[str, Any], List[Dict[str, Any]]],
        include_assigned: bool = True,
        ignore_excluded: bool = True,
        ignore_space_tokens: bool = True,
        detailled_statusmapping: Optional[Dict[int, str]] = None,
    ):
        self.nlp = nlp
        self.detailled_statusmapping = detailled_statusmapping or {
            0: "ABSENT",
            1: "PRESENT",
        }

        super().__init__(
            nlp=nlp,
            name=name,
            attr="NORM",
            patterns=patterns,
            ignore_excluded=ignore_excluded,
            ignore_space_tokens=ignore_space_tokens,
            regex_flags=re.S,
            alignment_mode="expand",
            assign_as_span=True,
            include_assigned=include_assigned,
        )

        self.set_extensions()

    @classmethod
    def set_extensions(cl) -> None:
        super().set_extensions()

        if not Span.has_extension("status"):
            Span.set_extension("status", default=1)
        if not Span.has_extension("detailled_status"):
            Span.set_extension("detailled_status", default="PRESENT")

    def __call__(self, doc: Doc) -> Doc:
        """
        Tags entities.

        Parameters
        ----------
        doc : Doc
            spaCy Doc object

        Returns
        -------
        doc : Doc
            annotated spaCy Doc object
        """
        spans = self.postprocess(doc, self.process(doc))
        spans = filter_spans(spans)

        for span in spans:
            span._.detailled_status = self.detailled_statusmapping[span._.status]

        doc.spans[self.name] = spans

        return doc

    def postprocess(self, doc: Doc, spans: Iterable[Span]):
        """
        Can be overrid
        """
        yield from spans

__call__(doc)

Tags entities.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
doc

annotated spaCy Doc object

TYPE: Doc

Source code in edsnlp/pipelines/ner/disorders/base.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __call__(self, doc: Doc) -> Doc:
    """
    Tags entities.

    Parameters
    ----------
    doc : Doc
        spaCy Doc object

    Returns
    -------
    doc : Doc
        annotated spaCy Doc object
    """
    spans = self.postprocess(doc, self.process(doc))
    spans = filter_spans(spans)

    for span in spans:
        span._.detailled_status = self.detailled_statusmapping[span._.status]

    doc.spans[self.name] = spans

    return doc

postprocess(doc, spans)

Can be overrid

Source code in edsnlp/pipelines/ner/disorders/base.py
104
105
106
107
108
def postprocess(self, doc: Doc, spans: Iterable[Span]):
    """
    Can be overrid
    """
    yield from spans