Skip to content

edsnlp.pipelines.ner.scores.factory

create_component(nlp, name='eds.score', score_name=None, regex=None, value_extract=None, score_normalization=None, attr='NORM', window=7, flags=0, ignore_excluded=False, ignore_space_tokens=False)

PARAMETER DESCRIPTION
nlp

The spaCy object.

TYPE: Language

name

The name of the component.

TYPE: str DEFAULT: 'eds.score'

score_name

The name of the extracted score

TYPE: str DEFAULT: None

regex

A list of regexes to identify the score

TYPE: List[str] DEFAULT: None

attr

Whether to match on the text ('TEXT') or on the normalized text ('NORM')

TYPE: str DEFAULT: 'NORM'

value_extract

Regex with capturing group to get the score value

TYPE: str DEFAULT: None

score_normalization

Function that takes the "raw" value extracted from the value_extract regex, and should return:

  • None if no score could be extracted
  • The desired score value else

TYPE: Callable[[Union[str, None]], Any] DEFAULT: None

window

Number of token to include after the score's mention to find the score's value

TYPE: int DEFAULT: 7

ignore_excluded

Whether to ignore excluded spans when matching

TYPE: bool DEFAULT: False

ignore_space_tokens

Whether to ignore space tokens when matching

TYPE: bool DEFAULT: False

flags

Regex flags to use when matching

TYPE: Union[re.RegexFlag, int] DEFAULT: 0

Source code in edsnlp/pipelines/ner/scores/factory.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
@deprecated_factory(
    "score",
    "eds.score",
    default_config=DEFAULT_CONFIG,
    assigns=["doc.ents", "doc.spans"],
)
@Language.factory(
    "eds.score",
    default_config=DEFAULT_CONFIG,
    assigns=["doc.ents", "doc.spans"],
)
def create_component(
    nlp: Language,
    name: str = "eds.score",
    score_name: str = None,
    regex: List[str] = None,
    value_extract: str = None,
    score_normalization: Union[str, Callable[[Union[str, None]], Any]] = None,
    attr: str = "NORM",
    window: int = 7,
    flags: Union[re.RegexFlag, int] = 0,
    ignore_excluded: bool = False,
    ignore_space_tokens: bool = False,
):
    """
    Parameters
    ----------
    nlp : Language
        The spaCy object.
    name : str
        The name of the component.
    score_name : str
        The name of the extracted score
    regex : List[str]
        A list of regexes to identify the score
    attr : str
        Whether to match on the text ('TEXT') or on the normalized text ('NORM')
    value_extract : str
        Regex with capturing group to get the score value
    score_normalization : Callable[[Union[str,None]], Any]
        Function that takes the "raw" value extracted from the `value_extract` regex,
        and should return:

        - None if no score could be extracted
        - The desired score value else
    window : int
        Number of token to include after the score's mention to find the
        score's value
    ignore_excluded : bool
        Whether to ignore excluded spans when matching
    ignore_space_tokens : bool
        Whether to ignore space tokens when matching
    flags : Union[re.RegexFlag, int]
        Regex flags to use when matching
    """
    return Score(
        nlp,
        score_name=score_name,
        regex=regex,
        value_extract=value_extract,
        score_normalization=score_normalization,
        attr=attr,
        flags=flags,
        window=window,
        ignore_excluded=ignore_excluded,
        ignore_space_tokens=ignore_space_tokens,
    )