Skip to content

edsnlp.pipelines.ner.adicap.adicap

eds.adicap pipeline

Adicap

Bases: ContextualMatcher

Source code in edsnlp/pipelines/ner/adicap/adicap.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
class Adicap(ContextualMatcher):
    def __init__(self, nlp, pattern, attr, prefix, window):

        self.nlp = nlp
        if pattern is None:
            pattern = patterns.base_code

        if prefix is None:
            prefix = patterns.adicap_prefix

        adicap_pattern = dict(
            source="adicap",
            regex=prefix,
            regex_attr=attr,
            assign=[
                dict(
                    name="code",
                    regex=pattern,
                    window=window,
                    replace_entity=True,
                    reduce_mode=None,
                ),
            ],
        )

        super().__init__(
            nlp=nlp,
            name="adicap",
            attr=attr,
            patterns=adicap_pattern,
            ignore_excluded=False,
            regex_flags=0,
            alignment_mode="expand",
            include_assigned=False,
            assign_as_span=False,
        )

        self.decode_dict = get_adicap_dict()

        self.set_extensions()

    @classmethod
    def set_extensions(cls) -> None:
        super().set_extensions()
        if not Span.has_extension("adicap"):
            Span.set_extension("adicap", default=None)
        if not Span.has_extension("value"):
            Span.set_extension("value", default=None)

    def decode(self, code):
        code = re.sub("[^A-Za-z0-9 ]+", "", code)
        exploded = list(code)
        adicap = AdicapCode(
            code=code,
            sampling_mode=self.decode_dict["D1"]["codes"].get(exploded[0]),
            technic=self.decode_dict["D2"]["codes"].get(exploded[1]),
            organ=self.decode_dict["D3"]["codes"].get("".join(exploded[2:4])),
        )

        for d in ["D4", "D5", "D6", "D7"]:
            adicap_short = self.decode_dict[d]["codes"].get("".join(exploded[4:8]))
            adicap_long = self.decode_dict[d]["codes"].get("".join(exploded[2:8]))

            if (adicap_short is not None) | (adicap_long is not None):
                adicap.pathology = self.decode_dict[d]["label"]
                adicap.behaviour_type = self.decode_dict[d]["codes"].get(exploded[5])

                if adicap_short is not None:
                    adicap.pathology_type = adicap_short

                else:
                    adicap.pathology_type = adicap_long

        return adicap

    def __call__(self, doc: Doc) -> Doc:
        """
        Tags ADICAP mentions.

        Parameters
        ----------
        doc : Doc
            spaCy Doc object

        Returns
        -------
        doc : Doc
            spaCy Doc object, annotated for ADICAP
        """
        spans = self.process(doc)
        spans = filter_spans(spans)

        for span in spans:
            span._.adicap = self.decode(span._.assigned["code"])
            span._.value = span._.adicap
            span._.assigned = None

        doc.spans["adicap"] = spans

        ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

        doc.ents = ents

        if "discarded" not in doc.spans:
            doc.spans["discarded"] = []
        doc.spans["discarded"].extend(discarded)

        return doc

__call__(doc)

Tags ADICAP mentions.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
doc

spaCy Doc object, annotated for ADICAP

TYPE: Doc

Source code in edsnlp/pipelines/ner/adicap/adicap.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def __call__(self, doc: Doc) -> Doc:
    """
    Tags ADICAP mentions.

    Parameters
    ----------
    doc : Doc
        spaCy Doc object

    Returns
    -------
    doc : Doc
        spaCy Doc object, annotated for ADICAP
    """
    spans = self.process(doc)
    spans = filter_spans(spans)

    for span in spans:
        span._.adicap = self.decode(span._.assigned["code"])
        span._.value = span._.adicap
        span._.assigned = None

    doc.spans["adicap"] = spans

    ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

    doc.ents = ents

    if "discarded" not in doc.spans:
        doc.spans["discarded"] = []
    doc.spans["discarded"].extend(discarded)

    return doc