Skip to content

edsnlp.pipelines.ner.scores.tnm

patterns

modifier_pattern = '(?P<modifier>[cpyraum])' module-attribute

tumour_pattern = 't\\s?(?P<tumour>([0-4o]|is|x))x?' module-attribute

node_pattern = 'n\\s?(?P<node>[0-3o]|x)x?' module-attribute

metastasis_pattern = 'm\\s?(?P<metastasis>[01o]|x)x?' module-attribute

version_pattern = '\\(?(?P<version>uicc|accj|tnm)\\s+([ée]ditions|[ée]d\\.?)?\\s*(?P<version_year>\\d{4}|\\d{2})\\)?' module-attribute

spacer = '(.|\\n){1,5}' module-attribute

tnm_pattern = '(?<={version_pattern}{spacer})?' module-attribute

models

TnmEnum

Bases: Enum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
7
8
9
class TnmEnum(Enum):
    def __str__(self) -> str:
        return self.value
__str__()
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
8
9
def __str__(self) -> str:
    return self.value

Unknown

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
12
13
class Unknown(TnmEnum):
    unknown = "x"
unknown = 'x' class-attribute

Modifier

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
16
17
18
19
20
21
22
23
class Modifier(TnmEnum):
    clinical = "c"
    histopathology = "p"
    neoadjuvant_therapy = "y"
    recurrent = "r"
    autopsy = "a"
    ultrasonography = "u"
    multifocal = "m"
clinical = 'c' class-attribute
histopathology = 'p' class-attribute
neoadjuvant_therapy = 'y' class-attribute
recurrent = 'r' class-attribute
autopsy = 'a' class-attribute
ultrasonography = 'u' class-attribute
multifocal = 'm' class-attribute

Tumour

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
26
27
28
class Tumour(TnmEnum):
    unknown = "x"
    in_situ = "is"
unknown = 'x' class-attribute
in_situ = 'is' class-attribute

TNM

Bases: BaseModel

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class TNM(BaseModel):

    modifier: Optional[Union[int, Modifier]] = None
    tumour: Optional[Union[int, Tumour]] = None
    node: Optional[Union[int, Unknown]] = None
    metastasis: Optional[Union[int, Unknown]] = None

    version: Optional[str] = None
    version_year: Optional[int] = None

    @validator("*", pre=True)
    def coerce_o(cls, v):
        if isinstance(v, str):
            v = v.replace("o", "0")
        return v

    @validator("version_year")
    def validate_year(cls, v):
        if v is None:
            return v

        if v < 40:
            v += 2000
        elif v < 100:
            v += 1900

        return v

    def norm(self) -> str:
        norm = []

        if self.modifier is not None:
            norm.append(str(self.modifier))

        if self.tumour is not None:
            norm.append(f"T{self.tumour}")

        if self.node is not None:
            norm.append(f"N{self.node}")

        if self.metastasis is not None:
            norm.append(f"M{self.metastasis}")

        if self.version is not None and self.version_year is not None:
            norm.append(f" ({self.version.upper()} {self.version_year})")

        return "".join(norm)
modifier: Optional[Union[int, Modifier]] = None class-attribute
tumour: Optional[Union[int, Tumour]] = None class-attribute
node: Optional[Union[int, Unknown]] = None class-attribute
metastasis: Optional[Union[int, Unknown]] = None class-attribute
version: Optional[str] = None class-attribute
version_year: Optional[int] = None class-attribute
coerce_o(v)
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
41
42
43
44
45
@validator("*", pre=True)
def coerce_o(cls, v):
    if isinstance(v, str):
        v = v.replace("o", "0")
    return v
validate_year(v)
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
47
48
49
50
51
52
53
54
55
56
57
@validator("version_year")
def validate_year(cls, v):
    if v is None:
        return v

    if v < 40:
        v += 2000
    elif v < 100:
        v += 1900

    return v
norm()
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def norm(self) -> str:
    norm = []

    if self.modifier is not None:
        norm.append(str(self.modifier))

    if self.tumour is not None:
        norm.append(f"T{self.tumour}")

    if self.node is not None:
        norm.append(f"N{self.node}")

    if self.metastasis is not None:
        norm.append(f"M{self.metastasis}")

    if self.version is not None and self.version_year is not None:
        norm.append(f" ({self.version.upper()} {self.version_year})")

    return "".join(norm)

factory

DEFAULT_CONFIG = dict(pattern=None, attr='LOWER') module-attribute

create_component(nlp, name, pattern, attr)

Source code in edsnlp/pipelines/ner/scores/tnm/factory.py
13
14
15
16
17
18
19
20
21
22
23
24
@Language.factory("eds.TNM", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    pattern: Optional[Union[List[str], str]],
    attr: str,
):
    return TNM(
        nlp,
        pattern=pattern,
        attr=attr,
    )

tnm

eds.tnm pipeline.

PERIOD_PROXIMITY_THRESHOLD = 3 module-attribute

TNM

Bases: BaseComponent

Tags and normalizes TNM mentions.

PARAMETER DESCRIPTION
nlp

Language pipeline object

TYPE: spacy.language.Language

pattern

List of regular expressions for TNM mentions.

TYPE: Optional[Union[List[str], str]]

attr

spaCy attribute to use

TYPE: str

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class TNM(BaseComponent):
    """
    Tags and normalizes TNM mentions.

    Parameters
    ----------
    nlp : spacy.language.Language
        Language pipeline object
    pattern : Optional[Union[List[str], str]]
        List of regular expressions for TNM mentions.
    attr : str
        spaCy attribute to use
    """

    # noinspection PyProtectedMember
    def __init__(
        self,
        nlp: Language,
        pattern: Optional[Union[List[str], str]],
        attr: str,
    ):

        self.nlp = nlp

        if pattern is None:
            pattern = patterns.tnm_pattern

        if isinstance(pattern, str):
            pattern = [pattern]

        self.regex_matcher = RegexMatcher(attr=attr, alignment_mode="strict")
        self.regex_matcher.add("tnm", pattern)

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        """
        Set extensions for the dates pipeline.
        """

        if not Span.has_extension("value"):
            Span.set_extension("value", default=None)

    def process(self, doc: Doc) -> List[Span]:
        """
        Find TNM mentions in doc.

        Parameters
        ----------
        doc:
            spaCy Doc object

        Returns
        -------
        spans:
            list of tnm spans
        """

        spans = self.regex_matcher(
            doc,
            as_spans=True,
            return_groupdict=True,
        )

        spans = filter_spans(spans)

        return spans

    def parse(self, spans: List[Tuple[Span, Dict[str, str]]]) -> List[Span]:
        """
        Parse dates using the groupdict returned by the matcher.

        Parameters
        ----------
        spans : List[Tuple[Span, Dict[str, str]]]
            List of tuples containing the spans and groupdict
            returned by the matcher.

        Returns
        -------
        List[Span]
            List of processed spans, with the date parsed.
        """

        for span, groupdict in spans:

            span._.value = models.TNM.parse_obj(groupdict)
            span.kb_id_ = span._.value.norm()

        return [span for span, _ in spans]

    def __call__(self, doc: Doc) -> Doc:
        """
        Tags TNM mentions.

        Parameters
        ----------
        doc : Doc
            spaCy Doc object

        Returns
        -------
        doc : Doc
            spaCy Doc object, annotated for TNM
        """
        spans = self.process(doc)
        spans = filter_spans(spans)

        spans = self.parse(spans)

        doc.spans["tnm"] = spans

        ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

        doc.ents = ents

        if "discarded" not in doc.spans:
            doc.spans["discarded"] = []
        doc.spans["discarded"].extend(discarded)

        return doc
nlp = nlp instance-attribute
regex_matcher = RegexMatcher(attr=attr, alignment_mode='strict') instance-attribute
__init__(nlp, pattern, attr)
Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def __init__(
    self,
    nlp: Language,
    pattern: Optional[Union[List[str], str]],
    attr: str,
):

    self.nlp = nlp

    if pattern is None:
        pattern = patterns.tnm_pattern

    if isinstance(pattern, str):
        pattern = [pattern]

    self.regex_matcher = RegexMatcher(attr=attr, alignment_mode="strict")
    self.regex_matcher.add("tnm", pattern)

    self.set_extensions()
set_extensions()

Set extensions for the dates pipeline.

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
51
52
53
54
55
56
57
58
@staticmethod
def set_extensions() -> None:
    """
    Set extensions for the dates pipeline.
    """

    if not Span.has_extension("value"):
        Span.set_extension("value", default=None)
process(doc)

Find TNM mentions in doc.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
spans

list of tnm spans

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def process(self, doc: Doc) -> List[Span]:
    """
    Find TNM mentions in doc.

    Parameters
    ----------
    doc:
        spaCy Doc object

    Returns
    -------
    spans:
        list of tnm spans
    """

    spans = self.regex_matcher(
        doc,
        as_spans=True,
        return_groupdict=True,
    )

    spans = filter_spans(spans)

    return spans
parse(spans)

Parse dates using the groupdict returned by the matcher.

PARAMETER DESCRIPTION
spans

List of tuples containing the spans and groupdict returned by the matcher.

TYPE: List[Tuple[Span, Dict[str, str]]]

RETURNS DESCRIPTION
List[Span]

List of processed spans, with the date parsed.

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def parse(self, spans: List[Tuple[Span, Dict[str, str]]]) -> List[Span]:
    """
    Parse dates using the groupdict returned by the matcher.

    Parameters
    ----------
    spans : List[Tuple[Span, Dict[str, str]]]
        List of tuples containing the spans and groupdict
        returned by the matcher.

    Returns
    -------
    List[Span]
        List of processed spans, with the date parsed.
    """

    for span, groupdict in spans:

        span._.value = models.TNM.parse_obj(groupdict)
        span.kb_id_ = span._.value.norm()

    return [span for span, _ in spans]
__call__(doc)

Tags TNM mentions.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
doc

spaCy Doc object, annotated for TNM

TYPE: Doc

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def __call__(self, doc: Doc) -> Doc:
    """
    Tags TNM mentions.

    Parameters
    ----------
    doc : Doc
        spaCy Doc object

    Returns
    -------
    doc : Doc
        spaCy Doc object, annotated for TNM
    """
    spans = self.process(doc)
    spans = filter_spans(spans)

    spans = self.parse(spans)

    doc.spans["tnm"] = spans

    ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

    doc.ents = ents

    if "discarded" not in doc.spans:
        doc.spans["discarded"] = []
    doc.spans["discarded"].extend(discarded)

    return doc
Back to top