Skip to content

edsnlp.pipelines.ner.scores.sofa

sofa

Sofa

Bases: Score

Matcher component to extract the SOFA score

PARAMETER DESCRIPTION
nlp

The spaCy object.

TYPE: Language

score_name

The name of the extracted score

TYPE: str

regex

A list of regexes to identify the SOFA score

TYPE: List[str]

attr

Wether to match on the text ('TEXT') or on the normalized text ('CUSTOM_NORM')

TYPE: str

method_regex

Regex with capturing group to get the score extraction method (e.g. "à l'admission", "à 24H", "Maximum")

TYPE: str

value_regex

Regex to extract the score value

TYPE: str

score_normalization

Function that takes the "raw" value extracted from the after_extract regex, and should return - None if no score could be extracted - The desired score value else

TYPE: Callable[[Union[str,None]], Any]

window

Number of token to include after the score's mention to find the score's value

TYPE: int

Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class Sofa(Score):
    """
    Matcher component to extract the SOFA score

    Parameters
    ----------
    nlp : Language
        The spaCy object.
    score_name : str
        The name of the extracted score
    regex : List[str]
        A list of regexes to identify the SOFA score
    attr : str
        Wether to match on the text ('TEXT') or on the normalized text ('CUSTOM_NORM')
    method_regex : str
        Regex with capturing group to get the score extraction method
        (e.g. "à l'admission", "à 24H", "Maximum")
    value_regex : str
        Regex to extract the score value
    score_normalization : Callable[[Union[str,None]], Any]
        Function that takes the "raw" value extracted from the `after_extract` regex,
        and should return
        - None if no score could be extracted
        - The desired score value else
    window : int
        Number of token to include after the score's mention to find the
        score's value
    """

    def __init__(
        self,
        nlp: Language,
        score_name: str,
        regex: List[str],
        attr: str,
        method_regex: str,
        value_regex: str,
        score_normalization: Union[str, Callable[[Union[str, None]], Any]],
        window: int,
        verbose: int,
        ignore_excluded: bool,
    ):

        super().__init__(
            nlp,
            score_name=score_name,
            regex=regex,
            after_extract=[],
            score_normalization=score_normalization,
            attr=attr,
            window=window,
            verbose=verbose,
            ignore_excluded=ignore_excluded,
        )

        self.method_regex = method_regex
        self.value_regex = value_regex

        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        super(Sofa, Sofa).set_extensions()
        if not Span.has_extension("score_method"):
            Span.set_extension("score_method", default=None)

    def score_filtering(self, ents: List[Span]) -> List[Span]:
        """
        Extracts, if available, the value of the score.
        Normalizes the score via the provided `self.score_normalization` method.

        Parameters
        ----------
        ents: List[Span]
            List of spaCy's spans extracted by the score matcher

        Returns
        -------
        ents: List[Span]
            List of spaCy's spans, with, if found, an added `score_value` extension
        """

        to_keep_ents = []

        for ent in ents:
            after_snippet = get_text(
                ent._.after_snippet,
                attr=self.attr,
                ignore_excluded=self.ignore_excluded,
            )
            matches = re.search(self.method_regex, after_snippet)

            if matches is None:
                method = "Non précisée"
                value = after_snippet

            else:
                groups = matches.groupdict()
                value = groups["after_value"]
                if groups["max"] is not None:
                    method = "Maximum"
                elif groups["vqheures"] is not None:
                    method = "24H"
                elif groups["admission"] is not None:
                    method = "A l'admission"

            digit_value = re.match(
                self.value_regex, value
            )  # Use match instead of search to only look at the beginning
            digit_value = None if digit_value is None else digit_value.groups()[0]

            normalized_value = self.score_normalization(digit_value)
            if normalized_value is not None:
                ent._.score_name = self.score_name
                ent._.score_value = int(normalized_value)
                ent._.score_method = method
                to_keep_ents.append(ent)

        return to_keep_ents
method_regex = method_regex instance-attribute
value_regex = value_regex instance-attribute
__init__(nlp, score_name, regex, attr, method_regex, value_regex, score_normalization, window, verbose, ignore_excluded)
Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def __init__(
    self,
    nlp: Language,
    score_name: str,
    regex: List[str],
    attr: str,
    method_regex: str,
    value_regex: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    window: int,
    verbose: int,
    ignore_excluded: bool,
):

    super().__init__(
        nlp,
        score_name=score_name,
        regex=regex,
        after_extract=[],
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )

    self.method_regex = method_regex
    self.value_regex = value_regex

    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
71
72
73
74
75
@staticmethod
def set_extensions() -> None:
    super(Sofa, Sofa).set_extensions()
    if not Span.has_extension("score_method"):
        Span.set_extension("score_method", default=None)
score_filtering(ents)

Extracts, if available, the value of the score. Normalizes the score via the provided self.score_normalization method.

PARAMETER DESCRIPTION
ents

List of spaCy's spans extracted by the score matcher

TYPE: List[Span]

RETURNS DESCRIPTION
ents

List of spaCy's spans, with, if found, an added score_value extension

Source code in edsnlp/pipelines/ner/scores/sofa/sofa.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def score_filtering(self, ents: List[Span]) -> List[Span]:
    """
    Extracts, if available, the value of the score.
    Normalizes the score via the provided `self.score_normalization` method.

    Parameters
    ----------
    ents: List[Span]
        List of spaCy's spans extracted by the score matcher

    Returns
    -------
    ents: List[Span]
        List of spaCy's spans, with, if found, an added `score_value` extension
    """

    to_keep_ents = []

    for ent in ents:
        after_snippet = get_text(
            ent._.after_snippet,
            attr=self.attr,
            ignore_excluded=self.ignore_excluded,
        )
        matches = re.search(self.method_regex, after_snippet)

        if matches is None:
            method = "Non précisée"
            value = after_snippet

        else:
            groups = matches.groupdict()
            value = groups["after_value"]
            if groups["max"] is not None:
                method = "Maximum"
            elif groups["vqheures"] is not None:
                method = "24H"
            elif groups["admission"] is not None:
                method = "A l'admission"

        digit_value = re.match(
            self.value_regex, value
        )  # Use match instead of search to only look at the beginning
        digit_value = None if digit_value is None else digit_value.groups()[0]

        normalized_value = self.score_normalization(digit_value)
        if normalized_value is not None:
            ent._.score_name = self.score_name
            ent._.score_value = int(normalized_value)
            ent._.score_method = method
            to_keep_ents.append(ent)

    return to_keep_ents

patterns

regex = ['\\bsofa\\b'] module-attribute

method_regex = 'sofa.*?((?P<max>max\\w*)|(?P<vqheures>24h\\w*)|(?P<admission>admission\\w*))(?P<after_value>(.|\\n)*)' module-attribute

value_regex = '.*?.[\\n\\W]*?(\\d+)[^h\\d]' module-attribute

score_normalization_str = 'score_normalization.sofa' module-attribute

score_normalization(extracted_score)

Sofa score normalization. If available, returns the integer value of the SOFA score.

Source code in edsnlp/pipelines/ner/scores/sofa/patterns.py
17
18
19
20
21
22
23
24
25
@spacy.registry.misc(score_normalization_str)
def score_normalization(extracted_score: Union[str, None]):
    """
    Sofa score normalization.
    If available, returns the integer value of the SOFA score.
    """
    score_range = list(range(0, 30))
    if (extracted_score is not None) and (int(extracted_score) in score_range):
        return int(extracted_score)

factory

DEFAULT_CONFIG = dict(regex=patterns.regex, method_regex=patterns.method_regex, value_regex=patterns.value_regex, score_normalization=patterns.score_normalization_str, attr='NORM', window=20, verbose=0, ignore_excluded=False) module-attribute

create_component(nlp, name, regex, method_regex, value_regex, score_normalization, attr, window, verbose, ignore_excluded)

Source code in edsnlp/pipelines/ner/scores/sofa/factory.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@deprecated_factory("SOFA", "eds.SOFA", default_config=DEFAULT_CONFIG)
@Language.factory("eds.SOFA", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    regex: List[str],
    method_regex: str,
    value_regex: str,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]],
    attr: str,
    window: int,
    verbose: int,
    ignore_excluded: bool,
):
    return Sofa(
        nlp,
        score_name=name,
        regex=regex,
        method_regex=method_regex,
        value_regex=value_regex,
        score_normalization=score_normalization,
        attr=attr,
        window=window,
        verbose=verbose,
        ignore_excluded=ignore_excluded,
    )
Back to top