Skip to content

edsnlp.pipelines.ner.scores

base_score

Score

Bases: AdvancedRegex

Matcher component to extract a numeric score

PARAMETER DESCRIPTION
nlp

The spaCy object.

TYPE: Language

score_name

The name of the extracted score

TYPE: str

regex

A list of regexes to identify the score

TYPE: List[str]

attr

Wether to match on the text ('TEXT') or on the normalized text ('NORM')

TYPE: str

after_extract

Regex with capturing group to get the score value

TYPE: str

score_normalization

Function that takes the "raw" value extracted from the after_extract regex, and should return - None if no score could be extracted - The desired score value else

TYPE: Callable[[Union[str,None]], Any]

window

Number of token to include after the score's mention to find the score's value

TYPE: int

Source code in edsnlp/pipelines/ner/scores/base_score.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class Score(AdvancedRegex):
    """
    Matcher component to extract a numeric score

    Parameters
    ----------
    nlp : Language
        The spaCy object.
    score_name : str
        The name of the extracted score
    regex : List[str]
        A list of regexes to identify the score
    attr : str
        Wether to match on the text ('TEXT') or on the normalized text ('NORM')
    after_extract : str
        Regex with capturing group to get the score value
    score_normalization : Callable[[Union[str,None]], Any]
        Function that takes the "raw" value extracted from the `after_extract` regex,
        and should return
        - None if no score could be extracted
        - The desired score value else
    window : int
        Number of token to include after the score's mention to find the
        score's value
    """

    def __init__(
        self,
        nlp: Language,
        score_name: str,
        regex: List[str],
        attr: str,
        after_extract: str,
        score_normalization: Union[str, Callable[[Union[str, None]], Any]],
        window: int,
        verbose: int,
        ignore_excluded: bool,
    ):

        regex_config = {
            score_name: dict(regex=regex, attr=attr, after_extract=after_extract)
        }

        super().__init__(
            nlp=nlp,
            regex_config=regex_config,
            window=window,
            verbose=verbose,
            ignore_excluded=ignore_excluded,
            attr=attr,
        )

        self.score_name = score_name

        if isinstance(score_normalization, str):
            self.score_normalization = registry.get("misc", score_normalization)
        else:
            self.score_normalization = score_normalization

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        super(Score, Score).set_extensions()
        if not Span.has_extension("score_name"):
            Span.set_extension("score_name", default=None)
        if not Span.has_extension("score_value"):
            Span.set_extension("score_value", default=None)

    def __call__(self, doc: Doc) -> Doc:
        """
        Adds spans to document.

        Parameters
        ----------
        doc:
            spaCy Doc object

        Returns
        -------
        doc:
            spaCy Doc object, annotated for extracted terms.
        """

        ents = super(Score, Score).process(self, doc)
        ents = self.score_filtering(ents)

        ents, discarded = filter_spans(list(doc.ents) + ents, return_discarded=True)

        doc.ents = ents

        if "discarded" not in doc.spans:
            doc.spans["discarded"] = []
        doc.spans["discarded"].extend(discarded)

        return doc

    def score_filtering(self, ents: List[Span]) -> List[Span]:
        """
        Extracts, if available, the value of the score.
        Normalizes the score via the provided `self.score_normalization` method.

        Parameters
        ----------
        ents: List[Span]
            List of spaCy's spans extracted by the score matcher

        Returns
        -------
        ents: List[Span]
            List of spaCy's spans, with, if found, an added `score_value` extension
        """
        to_keep_ents = []
        for ent in ents:
            value = ent._.after_extract[0]
            normalized_value = self.score_normalization(value)
            if normalized_value is not None:
                ent._.score_name = self.score_name
                ent._.score_value = int(value)
                to_keep_ents.append(ent)

        return to_keep_ents
score_name = score_name instance-attribute
score_normalization = registry.get('misc', score_normalization) instance-attribute
__init__(nlp, score_name, regex, attr, after_extract, score_normalization, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/base_score.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    nlp: Language,
    score_name: str,
    regex: List[str],
    attr: str,
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    window: int,
    verbose: int,
    ignore_excluded: bool,
):

    regex_config = {
        score_name: dict(regex=regex, attr=attr, after_extract=after_extract)
    }

    super().__init__(
        nlp=nlp,
        regex_config=regex_config,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
        attr=attr,
    )

    self.score_name = score_name

    if isinstance(score_normalization, str):
        self.score_normalization = registry.get("misc", score_normalization)
    else:
        self.score_normalization = score_normalization

    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/ner/scores/base_score.py
72
73
74
75
76
77
78
@staticmethod
def set_extensions() -> None:
    super(Score, Score).set_extensions()
    if not Span.has_extension("score_name"):
        Span.set_extension("score_name", default=None)
    if not Span.has_extension("score_value"):
        Span.set_extension("score_value", default=None)
__call__(doc)

Adds spans to document.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
doc

spaCy Doc object, annotated for extracted terms.

Source code in edsnlp/pipelines/ner/scores/base_score.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def __call__(self, doc: Doc) -> Doc:
    """
    Adds spans to document.

    Parameters
    ----------
    doc:
        spaCy Doc object

    Returns
    -------
    doc:
        spaCy Doc object, annotated for extracted terms.
    """

    ents = super(Score, Score).process(self, doc)
    ents = self.score_filtering(ents)

    ents, discarded = filter_spans(list(doc.ents) + ents, return_discarded=True)

    doc.ents = ents

    if "discarded" not in doc.spans:
        doc.spans["discarded"] = []
    doc.spans["discarded"].extend(discarded)

    return doc
score_filtering(ents)

Extracts, if available, the value of the score. Normalizes the score via the provided self.score_normalization method.

PARAMETER DESCRIPTION
ents

List of spaCy's spans extracted by the score matcher

TYPE: List[Span]

RETURNS DESCRIPTION
ents

List of spaCy's spans, with, if found, an added score_value extension

Source code in edsnlp/pipelines/ner/scores/base_score.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def score_filtering(self, ents: List[Span]) -> List[Span]:
    """
    Extracts, if available, the value of the score.
    Normalizes the score via the provided `self.score_normalization` method.

    Parameters
    ----------
    ents: List[Span]
        List of spaCy's spans extracted by the score matcher

    Returns
    -------
    ents: List[Span]
        List of spaCy's spans, with, if found, an added `score_value` extension
    """
    to_keep_ents = []
    for ent in ents:
        value = ent._.after_extract[0]
        normalized_value = self.score_normalization(value)
        if normalized_value is not None:
            ent._.score_name = self.score_name
            ent._.score_value = int(value)
            to_keep_ents.append(ent)

    return to_keep_ents

factory

DEFAULT_CONFIG = dict(attr='NORM', window=7, verbose=0, ignore_excluded=False) module-attribute

create_component(nlp, name, score_name, regex, after_extract, score_normalization, attr, window, verbose, ignore_excluded)

Source code in edsnlp/pipelines/ner/scores/factory.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
@deprecated_factory("score", "eds.score", default_config=DEFAULT_CONFIG)
@Language.factory("eds.score", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    score_name: str,
    regex: List[str],
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Score(
        nlp,
        score_name=score_name,
        regex=regex,
        after_extract=after_extract,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

sofa

patterns

regex = ['\\bsofa\\b'] module-attribute
method_regex = 'sofa.*?((?P<max>max\\w*)|(?P<vqheures>24h\\w*)|(?P<admission>admission\\w*))(?P<after_value>(.|\\n)*)' module-attribute
value_regex = '.*?.[\\n\\W]*?(\\d+)[^h\\d]' module-attribute
score_normalization_str = 'score_normalization.sofa' module-attribute
score_normalization(extracted_score)

Sofa score normalization. If available, returns the integer value of the SOFA score.

Source code in edsnlp/pipelines/ner/scores/sofa/patterns.py
17
18
19
20
21
22
23
24
25
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    Sofa score normalization.
    If available, returns the integer value of the SOFA score.
    """
    score_range = list(range(0, 30))
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)

sofa

Sofa

Bases: Score

Matcher component to extract the SOFA score

PARAMETER DESCRIPTION
nlp

The spaCy object.

TYPE: Language

score_name

The name of the extracted score

TYPE: str

regex

A list of regexes to identify the SOFA score

TYPE: List[str]

attr

Wether to match on the text ('TEXT') or on the normalized text ('CUSTOM_NORM')

TYPE: str

method_regex

Regex with capturing group to get the score extraction method (e.g. "à l'admission", "à 24H", "Maximum")

TYPE: str

value_regex

Regex to extract the score value

TYPE: str

score_normalization

Function that takes the "raw" value extracted from the after_extract regex, and should return - None if no score could be extracted - The desired score value else

TYPE: Callable[[Union[str,None]], Any]

window

Number of token to include after the score's mention to find the score's value

TYPE: int

Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class Sofa(Score):
    """
    Matcher component to extract the SOFA score

    Parameters
    ----------
    nlp : Language
        The spaCy object.
    score_name : str
        The name of the extracted score
    regex : List[str]
        A list of regexes to identify the SOFA score
    attr : str
        Wether to match on the text ('TEXT') or on the normalized text ('CUSTOM_NORM')
    method_regex : str
        Regex with capturing group to get the score extraction method
        (e.g. "à l'admission", "à 24H", "Maximum")
    value_regex : str
        Regex to extract the score value
    score_normalization : Callable[[Union[str,None]], Any]
        Function that takes the "raw" value extracted from the `after_extract` regex,
        and should return
        - None if no score could be extracted
        - The desired score value else
    window : int
        Number of token to include after the score's mention to find the
        score's value
    """

    def __init__(
        self,
        nlp: Language,
        score_name: str,
        regex: List[str],
        attr: str,
        method_regex: str,
        value_regex: str,
        score_normalization: Union[str, Callable[[Union[str, None]], Any]],
        window: int,
        verbose: int,
        ignore_excluded: bool,
    ):

        super().__init__(
            nlp,
            score_name=score_name,
            regex=regex,
            after_extract=[],
            score_normalization=score_normalization,
            attr=attr,
            window=window,
            verbose=verbose,
            ignore_excluded=ignore_excluded,
        )

        self.method_regex = method_regex
        self.value_regex = value_regex

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        super(Sofa, Sofa).set_extensions()
        if not Span.has_extension("score_method"):
            Span.set_extension("score_method", default=None)

    def score_filtering(self, ents: List[Span]) -> List[Span]:
        """
        Extracts, if available, the value of the score.
        Normalizes the score via the provided `self.score_normalization` method.

        Parameters
        ----------
        ents: List[Span]
            List of spaCy's spans extracted by the score matcher

        Returns
        -------
        ents: List[Span]
            List of spaCy's spans, with, if found, an added `score_value` extension
        """

        to_keep_ents = []

        for ent in ents:
            after_snippet = get_text(
                ent._.after_snippet,
                attr=self.attr,
                ignore_excluded=self.ignore_excluded,
            )
            matches = re.search(self.method_regex, after_snippet)

            if matches is None:
                method = "Non précisée"
                value = after_snippet

            else:
                groups = matches.groupdict()
                value = groups["after_value"]
                if groups["max"] is not None:
                    method = "Maximum"
                elif groups["vqheures"] is not None:
                    method = "24H"
                elif groups["admission"] is not None:
                    method = "A l'admission"

            digit_value = re.match(
                self.value_regex, value
            )  # Use match instead of search to only look at the beginning
            digit_value = None if digit_value is None else digit_value.groups()[0]

            normalized_value = self.score_normalization(digit_value)
            if normalized_value is not None:
                ent._.score_name = self.score_name
                ent._.score_value = int(normalized_value)
                ent._.score_method = method
                to_keep_ents.append(ent)

        return to_keep_ents
method_regex = method_regex instance-attribute
value_regex = value_regex instance-attribute
__init__(nlp, score_name, regex, attr, method_regex, value_regex, score_normalization, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    nlp: Language,
    score_name: str,
    regex: List[str],
    attr: str,
    method_regex: str,
    value_regex: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    window: int,
    verbose: int,
    ignore_excluded: bool,
):

    super().__init__(
        nlp,
        score_name=score_name,
        regex=regex,
        after_extract=[],
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

    self.method_regex = method_regex
    self.value_regex = value_regex

    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
71
72
73
74
75
@staticmethod
def set_extensions() -> None:
    super(Sofa, Sofa).set_extensions()
    if not Span.has_extension("score_method"):
        Span.set_extension("score_method", default=None)
score_filtering(ents)

Extracts, if available, the value of the score. Normalizes the score via the provided self.score_normalization method.

PARAMETER DESCRIPTION
ents

List of spaCy's spans extracted by the score matcher

TYPE: List[Span]

RETURNS DESCRIPTION
ents

List of spaCy's spans, with, if found, an added score_value extension

Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def score_filtering(self, ents: List[Span]) -> List[Span]:
    """
    Extracts, if available, the value of the score.
    Normalizes the score via the provided `self.score_normalization` method.

    Parameters
    ----------
    ents: List[Span]
        List of spaCy's spans extracted by the score matcher

    Returns
    -------
    ents: List[Span]
        List of spaCy's spans, with, if found, an added `score_value` extension
    """

    to_keep_ents = []

    for ent in ents:
        after_snippet = get_text(
            ent._.after_snippet,
            attr=self.attr,
            ignore_excluded=self.ignore_excluded,
        )
        matches = re.search(self.method_regex, after_snippet)

        if matches is None:
            method = "Non précisée"
            value = after_snippet

        else:
            groups = matches.groupdict()
            value = groups["after_value"]
            if groups["max"] is not None:
                method = "Maximum"
            elif groups["vqheures"] is not None:
                method = "24H"
            elif groups["admission"] is not None:
                method = "A l'admission"

        digit_value = re.match(
            self.value_regex, value
        )  # Use match instead of search to only look at the beginning
        digit_value = None if digit_value is None else digit_value.groups()[0]

        normalized_value = self.score_normalization(digit_value)
        if normalized_value is not None:
            ent._.score_name = self.score_name
            ent._.score_value = int(normalized_value)
            ent._.score_method = method
            to_keep_ents.append(ent)

    return to_keep_ents

factory

DEFAULT_CONFIG = dict(regex=patterns.regex, method_regex=patterns.method_regex, value_regex=patterns.value_regex, score_normalization=patterns.score_normalization_str, attr='NORM', window=20, verbose=0, ignore_excluded=False) module-attribute
create_component(nlp, name, regex, method_regex, value_regex, score_normalization, attr, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/sofa/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@deprecated_factory("SOFA", "eds.SOFA", default_config=DEFAULT_CONFIG)
@Language.factory("eds.SOFA", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    method_regex: str,
    value_regex: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Sofa(
        nlp,
        score_name=name,
        regex=regex,
        method_regex=method_regex,
        value_regex=value_regex,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

charlson

patterns

regex = ['charlson'] module-attribute
after_extract = 'charlson.*?[\\n\\W]*?(\\d+)' module-attribute
score_normalization_str = 'score_normalization.charlson' module-attribute
score_normalization(extracted_score)

Charlson score normalization. If available, returns the integer value of the Charlson score.

Source code in edsnlp/pipelines/ner/scores/charlson/patterns.py
12
13
14
15
16
17
18
19
20
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    Charlson score normalization.
    If available, returns the integer value of the Charlson score.
    """
    score_range = list(range(0, 30))
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)

factory

DEFAULT_CONFIG = dict(regex=patterns.regex, after_extract=patterns.after_extract, score_normalization=patterns.score_normalization_str, attr='NORM', window=7, verbose=0, ignore_excluded=False) module-attribute
create_component(nlp, name, regex, after_extract, score_normalization, attr, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/charlson/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@deprecated_factory("charlson", "eds.charlson", default_config=DEFAULT_CONFIG)
@Language.factory("eds.charlson", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Score(
        nlp,
        score_name=name,
        regex=regex,
        after_extract=after_extract,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

emergency

gemsa

patterns
regex = ['\\bgemsa\\b'] module-attribute
after_extract = 'gemsa.*?[\\n\\W]*?(\\d+)' module-attribute
score_normalization_str = 'score_normalization.gemsa' module-attribute
score_normalization(extracted_score)

GEMSA score normalization. If available, returns the integer value of the GEMSA score.

Source code in edsnlp/pipelines/ner/scores/emergency/gemsa/patterns.py
12
13
14
15
16
17
18
19
20
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    GEMSA score normalization.
    If available, returns the integer value of the GEMSA score.
    """
    score_range = [1, 2, 3, 4, 5, 6]
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)
factory
DEFAULT_CONFIG = dict(regex=patterns.regex, after_extract=patterns.after_extract, score_normalization=patterns.score_normalization_str, attr='NORM', window=20, verbose=0, ignore_excluded=False) module-attribute
create_component(nlp, name, regex, after_extract, score_normalization, attr, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/emergency/gemsa/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@deprecated_factory(
    "emergency.gemsa", "eds.emergency.gemsa", default_config=DEFAULT_CONFIG
)
@Language.factory("eds.emergency.gemsa", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Score(
        nlp,
        score_name=name,
        regex=regex,
        after_extract=after_extract,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

priority

patterns
regex = ['\\bpriorite\\b'] module-attribute
after_extract = 'priorite.*?[\\n\\W]*?(\\d+)' module-attribute
score_normalization_str = 'score_normalization.priority' module-attribute
score_normalization(extracted_score)

Priority score normalization. If available, returns the integer value of the priority score.

Source code in edsnlp/pipelines/ner/scores/emergency/priority/patterns.py
12
13
14
15
16
17
18
19
20
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    Priority score normalization.
    If available, returns the integer value of the priority score.
    """
    score_range = list(range(0, 6))
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)
factory
DEFAULT_CONFIG = dict(regex=patterns.regex, after_extract=patterns.after_extract, score_normalization=patterns.score_normalization_str, attr='NORM', window=7, verbose=0, ignore_excluded=False) module-attribute
create_component(nlp, name, regex, after_extract, score_normalization, attr, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/emergency/priority/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@deprecated_factory(
    "emergency.priority", "eds.emergency.priority", default_config=DEFAULT_CONFIG
)
@Language.factory("eds.emergency.priority", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Score(
        nlp,
        score_name=name,
        regex=regex,
        after_extract=after_extract,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

ccmu

patterns
regex = ['\\bccmu\\b'] module-attribute
after_extract = 'ccmu.*?[\\n\\W]*?(\\d+)' module-attribute
score_normalization_str = 'score_normalization.ccmu' module-attribute
score_normalization(extracted_score)

CCMU score normalization. If available, returns the integer value of the CCMU score.

Source code in edsnlp/pipelines/ner/scores/emergency/ccmu/patterns.py
12
13
14
15
16
17
18
19
20
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    CCMU score normalization.
    If available, returns the integer value of the CCMU score.
    """
    score_range = [1, 2, 3, 4, 5]
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)
factory
DEFAULT_CONFIG = dict(regex=patterns.regex, after_extract=patterns.after_extract, score_normalization=patterns.score_normalization_str, attr='NORM', window=20, verbose=0, ignore_excluded=False) module-attribute
create_component(nlp, name, regex, after_extract, score_normalization, attr, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/emergency/ccmu/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@deprecated_factory(
    "emergency.ccmu", "eds.emergency.ccmu", default_config=DEFAULT_CONFIG
)
@Language.factory("eds.emergency.ccmu", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    after_extract: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Score(
        nlp,
        score_name=name,
        regex=regex,
        after_extract=after_extract,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

tnm

patterns

modifier_pattern = '(?P<modifier>[cpyraum])' module-attribute
tumour_pattern = 't\\s?(?P<tumour>([0-4o]|is|x))x?' module-attribute
node_pattern = 'n\\s?(?P<node>[0-3o]|x)x?' module-attribute
metastasis_pattern = 'm\\s?(?P<metastasis>[01o]|x)x?' module-attribute
version_pattern = '\\(?(?P<version>uicc|accj|tnm)\\s+([ée]ditions|[ée]d\\.?)?\\s*(?P<version_year>\\d{4}|\\d{2})\\)?' module-attribute
spacer = '(.|\\n){1,5}' module-attribute
tnm_pattern = '(?<={version_pattern}{spacer})?' module-attribute

models

TnmEnum

Bases: Enum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
7
8
9
class TnmEnum(Enum):
    def __str__(self) -> str:
        return self.value
__str__()
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
8
9
def __str__(self) -> str:
    return self.value
Unknown

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
12
13
class Unknown(TnmEnum):
    unknown = "x"
unknown = 'x' class-attribute
Modifier

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
16
17
18
19
20
21
22
23
class Modifier(TnmEnum):
    clinical = "c"
    histopathology = "p"
    neoadjuvant_therapy = "y"
    recurrent = "r"
    autopsy = "a"
    ultrasonography = "u"
    multifocal = "m"
clinical = 'c' class-attribute
histopathology = 'p' class-attribute
neoadjuvant_therapy = 'y' class-attribute
recurrent = 'r' class-attribute
autopsy = 'a' class-attribute
ultrasonography = 'u' class-attribute
multifocal = 'm' class-attribute
Tumour

Bases: TnmEnum

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
26
27
28
class Tumour(TnmEnum):
    unknown = "x"
    in_situ = "is"
unknown = 'x' class-attribute
in_situ = 'is' class-attribute
TNM

Bases: BaseModel

Source code in edsnlp/pipelines/ner/scores/tnm/models.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class TNM(BaseModel):

    modifier: Optional[Union[int, Modifier]] = None
    tumour: Optional[Union[int, Tumour]] = None
    node: Optional[Union[int, Unknown]] = None
    metastasis: Optional[Union[int, Unknown]] = None

    version: Optional[str] = None
    version_year: Optional[int] = None

    @validator("*", pre=True)
    def coerce_o(cls, v):
        if isinstance(v, str):
            v = v.replace("o", "0")
        return v

    @validator("version_year")
    def validate_year(cls, v):
        if v is None:
            return v

        if v < 40:
            v += 2000
        elif v < 100:
            v += 1900

        return v

    def norm(self) -> str:
        norm = []

        if self.modifier is not None:
            norm.append(str(self.modifier))

        if self.tumour is not None:
            norm.append(f"T{self.tumour}")

        if self.node is not None:
            norm.append(f"N{self.node}")

        if self.metastasis is not None:
            norm.append(f"M{self.metastasis}")

        if self.version is not None and self.version_year is not None:
            norm.append(f" ({self.version.upper()} {self.version_year})")

        return "".join(norm)
modifier: Optional[Union[int, Modifier]] = None class-attribute
tumour: Optional[Union[int, Tumour]] = None class-attribute
node: Optional[Union[int, Unknown]] = None class-attribute
metastasis: Optional[Union[int, Unknown]] = None class-attribute
version: Optional[str] = None class-attribute
version_year: Optional[int] = None class-attribute
coerce_o(v)
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
41
42
43
44
45
@validator("*", pre=True)
def coerce_o(cls, v):
    if isinstance(v, str):
        v = v.replace("o", "0")
    return v
validate_year(v)
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
47
48
49
50
51
52
53
54
55
56
57
@validator("version_year")
def validate_year(cls, v):
    if v is None:
        return v

    if v < 40:
        v += 2000
    elif v < 100:
        v += 1900

    return v
norm()
Source code in edsnlp/pipelines/ner/scores/tnm/models.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def norm(self) -> str:
    norm = []

    if self.modifier is not None:
        norm.append(str(self.modifier))

    if self.tumour is not None:
        norm.append(f"T{self.tumour}")

    if self.node is not None:
        norm.append(f"N{self.node}")

    if self.metastasis is not None:
        norm.append(f"M{self.metastasis}")

    if self.version is not None and self.version_year is not None:
        norm.append(f" ({self.version.upper()} {self.version_year})")

    return "".join(norm)

factory

DEFAULT_CONFIG = dict(pattern=None, attr='LOWER') module-attribute
create_component(nlp, name, pattern, attr)
Source code in edsnlp/pipelines/ner/scores/tnm/factory.py
13
14
15
16
17
18
19
20
21
22
23
24
@Language.factory("eds.TNM", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    pattern: Optional[Union[List[str], str]],
    attr: str,
):
    return TNM(
        nlp,
        pattern=pattern,
        attr=attr,
    )

tnm

eds.tnm pipeline.

PERIOD_PROXIMITY_THRESHOLD = 3 module-attribute
TNM

Bases: BaseComponent

Tags and normalizes TNM mentions.

PARAMETER DESCRIPTION
nlp

Language pipeline object

TYPE: spacy.language.Language

pattern

List of regular expressions for TNM mentions.

TYPE: Optional[Union[List[str], str]]

attr

spaCy attribute to use

TYPE: str

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class TNM(BaseComponent):
    """
    Tags and normalizes TNM mentions.

    Parameters
    ----------
    nlp : spacy.language.Language
        Language pipeline object
    pattern : Optional[Union[List[str], str]]
        List of regular expressions for TNM mentions.
    attr : str
        spaCy attribute to use
    """

    # noinspection PyProtectedMember
    def __init__(
        self,
        nlp: Language,
        pattern: Optional[Union[List[str], str]],
        attr: str,
    ):

        self.nlp = nlp

        if pattern is None:
            pattern = patterns.tnm_pattern

        if isinstance(pattern, str):
            pattern = [pattern]

        self.regex_matcher = RegexMatcher(attr=attr, alignment_mode="strict")
        self.regex_matcher.add("tnm", pattern)

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        """
        Set extensions for the dates pipeline.
        """

        if not Span.has_extension("value"):
            Span.set_extension("value", default=None)

    def process(self, doc: Doc) -> List[Span]:
        """
        Find TNM mentions in doc.

        Parameters
        ----------
        doc:
            spaCy Doc object

        Returns
        -------
        spans:
            list of tnm spans
        """

        spans = self.regex_matcher(
            doc,
            as_spans=True,
            return_groupdict=True,
        )

        spans = filter_spans(spans)

        return spans

    def parse(self, spans: List[Tuple[Span, Dict[str, str]]]) -> List[Span]:
        """
        Parse dates using the groupdict returned by the matcher.

        Parameters
        ----------
        spans : List[Tuple[Span, Dict[str, str]]]
            List of tuples containing the spans and groupdict
            returned by the matcher.

        Returns
        -------
        List[Span]
            List of processed spans, with the date parsed.
        """

        for span, groupdict in spans:

            span._.value = models.TNM.parse_obj(groupdict)
            span.kb_id_ = span._.value.norm()

        return [span for span, _ in spans]

    def __call__(self, doc: Doc) -> Doc:
        """
        Tags TNM mentions.

        Parameters
        ----------
        doc : Doc
            spaCy Doc object

        Returns
        -------
        doc : Doc
            spaCy Doc object, annotated for TNM
        """
        spans = self.process(doc)
        spans = filter_spans(spans)

        spans = self.parse(spans)

        doc.spans["tnm"] = spans

        ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

        doc.ents = ents

        if "discarded" not in doc.spans:
            doc.spans["discarded"] = []
        doc.spans["discarded"].extend(discarded)

        return doc
nlp = nlp instance-attribute
regex_matcher = RegexMatcher(attr=attr, alignment_mode='strict') instance-attribute
__init__(nlp, pattern, attr)
Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def __init__(
    self,
    nlp: Language,
    pattern: Optional[Union[List[str], str]],
    attr: str,
):

    self.nlp = nlp

    if pattern is None:
        pattern = patterns.tnm_pattern

    if isinstance(pattern, str):
        pattern = [pattern]

    self.regex_matcher = RegexMatcher(attr=attr, alignment_mode="strict")
    self.regex_matcher.add("tnm", pattern)

    self.set_extensions()
set_extensions()

Set extensions for the dates pipeline.

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
51
52
53
54
55
56
57
58
@staticmethod
def set_extensions() -> None:
    """
    Set extensions for the dates pipeline.
    """

    if not Span.has_extension("value"):
        Span.set_extension("value", default=None)
process(doc)

Find TNM mentions in doc.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
spans

list of tnm spans

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def process(self, doc: Doc) -> List[Span]:
    """
    Find TNM mentions in doc.

    Parameters
    ----------
    doc:
        spaCy Doc object

    Returns
    -------
    spans:
        list of tnm spans
    """

    spans = self.regex_matcher(
        doc,
        as_spans=True,
        return_groupdict=True,
    )

    spans = filter_spans(spans)

    return spans
parse(spans)

Parse dates using the groupdict returned by the matcher.

PARAMETER DESCRIPTION
spans

List of tuples containing the spans and groupdict returned by the matcher.

TYPE: List[Tuple[Span, Dict[str, str]]]

RETURNS DESCRIPTION
List[Span]

List of processed spans, with the date parsed.

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def parse(self, spans: List[Tuple[Span, Dict[str, str]]]) -> List[Span]:
    """
    Parse dates using the groupdict returned by the matcher.

    Parameters
    ----------
    spans : List[Tuple[Span, Dict[str, str]]]
        List of tuples containing the spans and groupdict
        returned by the matcher.

    Returns
    -------
    List[Span]
        List of processed spans, with the date parsed.
    """

    for span, groupdict in spans:

        span._.value = models.TNM.parse_obj(groupdict)
        span.kb_id_ = span._.value.norm()

    return [span for span, _ in spans]
__call__(doc)

Tags TNM mentions.

PARAMETER DESCRIPTION
doc

spaCy Doc object

TYPE: Doc

RETURNS DESCRIPTION
doc

spaCy Doc object, annotated for TNM

TYPE: Doc

Source code in edsnlp/pipelines/ner/scores/tnm/tnm.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def __call__(self, doc: Doc) -> Doc:
    """
    Tags TNM mentions.

    Parameters
    ----------
    doc : Doc
        spaCy Doc object

    Returns
    -------
    doc : Doc
        spaCy Doc object, annotated for TNM
    """
    spans = self.process(doc)
    spans = filter_spans(spans)

    spans = self.parse(spans)

    doc.spans["tnm"] = spans

    ents, discarded = filter_spans(list(doc.ents) + spans, return_discarded=True)

    doc.ents = ents

    if "discarded" not in doc.spans:
        doc.spans["discarded"] = []
    doc.spans["discarded"].extend(discarded)

    return doc
Back to top