14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 | class Adicap(ContextualMatcher):
def __init__(self, nlp, pattern, attr, prefix, window):
self.nlp = nlp
if pattern is None:
pattern = patterns.base_code
if prefix is None:
prefix = patterns.adicap_prefix
adicap_pattern = dict(
source="adicap",
regex=prefix,
regex_attr=attr,
assign=[
dict(
name="code",
regex=pattern,
window=window,
replace_entity=True,
reduce_mode=None,
),
],
)
super().__init__(
nlp=nlp,
name="adicap",
attr=attr,
patterns=adicap_pattern,
ignore_excluded=False,
regex_flags=0,
alignment_mode="expand",
include_assigned=False,
assign_as_span=False,
)
self.decode_dict = get_adicap_dict()
self.set_extensions()
@classmethod
def set_extensions(cls) -> None:
super().set_extensions()
if not Span.has_extension("adicap"):
Span.set_extension("adicap", default=None)
if not Span.has_extension("value"):
Span.set_extension("value", default=None)
def decode(self, code):
code = re.sub("[^A-Za-z0-9 ]+", "", code)
exploded = list(code)
adicap = AdicapCode(
code=code,
sampling_mode=self.decode_dict["D1"]["codes"].get(exploded[0]),
technic=self.decode_dict["D2"]["codes"].get(exploded[1]),
organ=self.decode_dict["D3"]["codes"].get("".join(exploded[2:4])),
)
for d in ["D4", "D5", "D6", "D7"]:
adicap_short = self.decode_dict[d]["codes"].get("".join(exploded[4:8]))
adicap_long = self.decode_dict[d]["codes"].get("".join(exploded[2:8]))
if (adicap_short is not None) | (adicap_long is not None):
adicap.pathology = self.decode_dict[d]["label"]
adicap.behaviour_type = self.decode_dict[d]["codes"].get(exploded[5])
if adicap_short is not None:
adicap.pathology_type = adicap_short
else:
adicap.pathology_type = adicap_long
return adicap
def __call__(self, doc: Doc) -> Doc:
"""
Tags ADICAP mentions.
Parameters
----------
doc : Doc
spaCy Doc object
Returns
-------
doc : Doc
spaCy Doc object, annotated for ADICAP
"""
spans = self.process(doc)
spans = filter_spans(spans)
for span in spans:
span._.adicap = self.decode(span._.assigned["code"])
span._.value = span._.adicap
span._.assigned = None
doc.spans["adicap"] = spans
ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)
doc.ents = ents
if "discarded" not in doc.spans:
doc.spans["discarded"] = []
doc.spans["discarded"].extend(discarded)
return doc
|