Skip to content

edsnlp.pipelines.core.contextual_matcher.models

Flags = Union[re.RegexFlag, int] module-attribute

Window = Union[Tuple[int, int], List[int], int] module-attribute

AssignDict

Bases: dict

Custom dictionary that overrides the setitem method depending on the reduce_mode

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class AssignDict(dict):
    """
    Custom dictionary that overrides the __setitem__ method
    depending on the reduce_mode
    """

    def __init__(self, reduce_mode: dict):
        super().__init__()
        self.reduce_mode = reduce_mode
        self._setitem_ = self.__setitem_options__()

    def __missing__(self, key):
        return (
            {
                "span": [],
                "value_span": [],
                "value_text": [],
            }
            if self.reduce_mode[key] is None
            else {}
        )

    def __setitem__(self, key, value):
        self._setitem_[self.reduce_mode[key]](key, value)

    def __setitem_options__(self):
        def keep_list(key, value):
            old_values = self.__getitem__(key)
            value["span"] = old_values["span"] + [value["span"]]
            value["value_span"] = old_values["value_span"] + [value["value_span"]]
            value["value_text"] = old_values["value_text"] + [value["value_text"]]

            dict.__setitem__(self, key, value)

        def keep_first(key, value):
            old_values = self.__getitem__(key)
            if (
                old_values.get("span") is None
                or value["span"].start <= old_values["span"].start
            ):
                dict.__setitem__(self, key, value)

        def keep_last(key, value):
            old_values = self.__getitem__(key)
            if (
                old_values.get("span") is None
                or value["span"].start >= old_values["span"].start
            ):
                dict.__setitem__(self, key, value)

        return {
            None: keep_list,
            "keep_first": keep_first,
            "keep_last": keep_last,
        }

reduce_mode = reduce_mode instance-attribute

__init__(reduce_mode)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
38
39
40
41
def __init__(self, reduce_mode: dict):
    super().__init__()
    self.reduce_mode = reduce_mode
    self._setitem_ = self.__setitem_options__()

__missing__(key)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
43
44
45
46
47
48
49
50
51
52
def __missing__(self, key):
    return (
        {
            "span": [],
            "value_span": [],
            "value_text": [],
        }
        if self.reduce_mode[key] is None
        else {}
    )

__setitem__(key, value)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
54
55
def __setitem__(self, key, value):
    self._setitem_[self.reduce_mode[key]](key, value)

__setitem_options__()

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def __setitem_options__(self):
    def keep_list(key, value):
        old_values = self.__getitem__(key)
        value["span"] = old_values["span"] + [value["span"]]
        value["value_span"] = old_values["value_span"] + [value["value_span"]]
        value["value_text"] = old_values["value_text"] + [value["value_text"]]

        dict.__setitem__(self, key, value)

    def keep_first(key, value):
        old_values = self.__getitem__(key)
        if (
            old_values.get("span") is None
            or value["span"].start <= old_values["span"].start
        ):
            dict.__setitem__(self, key, value)

    def keep_last(key, value):
        old_values = self.__getitem__(key)
        if (
            old_values.get("span") is None
            or value["span"].start >= old_values["span"].start
        ):
            dict.__setitem__(self, key, value)

    return {
        None: keep_list,
        "keep_first": keep_first,
        "keep_last": keep_last,
    }

SingleExcludeModel

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class SingleExcludeModel(BaseModel):
    regex: ListOrStr = []
    window: Window
    regex_flags: Optional[Flags] = None

    @validator("regex")
    def exclude_regex_validation(cls, v):
        if type(v) == str:
            v = [v]
        return v

    _normalize_window = validator("window", allow_reuse=True)(normalize_window)

regex: ListOrStr = [] class-attribute

window: Window = None class-attribute

regex_flags: Optional[Flags] = None class-attribute

exclude_regex_validation(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
94
95
96
97
98
@validator("regex")
def exclude_regex_validation(cls, v):
    if type(v) == str:
        v = [v]
    return v

ExcludeModel

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
103
104
105
106
107
108
109
110
111
112
113
114
class ExcludeModel(BaseModel, extra=Extra.forbid):

    __root__: Union[
        List[SingleExcludeModel],
        SingleExcludeModel,
    ]

    @validator("__root__", pre=True)
    def item_to_list(cls, v):
        if not isinstance(v, list):
            return [v]
        return v

__root__: Union[List[SingleExcludeModel], SingleExcludeModel] = None class-attribute

item_to_list(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
110
111
112
113
114
@validator("__root__", pre=True)
def item_to_list(cls, v):
    if not isinstance(v, list):
        return [v]
    return v

SingleAssignModel

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class SingleAssignModel(BaseModel):
    name: str
    regex: str
    window: Window
    regex_flags: Optional[Flags] = None
    replace_entity: bool = False
    reduce_mode: Optional[str] = None

    @validator("regex")
    def check_single_regex_group(cls, pat):
        compiled_pat = re.compile(pat)
        n_groups = compiled_pat.groups
        assert n_groups == 1, (
            "The pattern {pat} should have only one capturing group, not {n_groups}"
        ).format(
            pat=pat,
            n_groups=n_groups,
        )

        return pat

    _normalize_window = validator("window", allow_reuse=True)(normalize_window)

name: str = None class-attribute

regex: str = None class-attribute

window: Window = None class-attribute

regex_flags: Optional[Flags] = None class-attribute

replace_entity: bool = False class-attribute

reduce_mode: Optional[str] = None class-attribute

check_single_regex_group(pat)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
125
126
127
128
129
130
131
132
133
134
135
136
@validator("regex")
def check_single_regex_group(cls, pat):
    compiled_pat = re.compile(pat)
    n_groups = compiled_pat.groups
    assert n_groups == 1, (
        "The pattern {pat} should have only one capturing group, not {n_groups}"
    ).format(
        pat=pat,
        n_groups=n_groups,
    )

    return pat

AssignModel

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
class AssignModel(BaseModel, extra=Extra.forbid):

    __root__: Union[
        List[SingleAssignModel],
        SingleAssignModel,
    ]

    @validator("__root__", pre=True)
    def item_to_list(cls, v):
        if not isinstance(v, list):
            return [v]
        return v

    @validator("__root__")
    def name_uniqueness(cls, v):
        names = [item.name for item in v]
        assert len(names) == len(set(names)), "Each `name` field should be unique"
        return v

    @validator("__root__")
    def replace_uniqueness(cls, v):
        replace = [item for item in v if item.replace_entity]
        assert (
            len(replace) <= 1
        ), "Only 1 assign element can be set with `replace_entity=True`"
        return v

__root__: Union[List[SingleAssignModel], SingleAssignModel] = None class-attribute

item_to_list(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
148
149
150
151
152
@validator("__root__", pre=True)
def item_to_list(cls, v):
    if not isinstance(v, list):
        return [v]
    return v

name_uniqueness(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
154
155
156
157
158
@validator("__root__")
def name_uniqueness(cls, v):
    names = [item.name for item in v]
    assert len(names) == len(set(names)), "Each `name` field should be unique"
    return v

replace_uniqueness(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
160
161
162
163
164
165
166
@validator("__root__")
def replace_uniqueness(cls, v):
    replace = [item for item in v if item.replace_entity]
    assert (
        len(replace) <= 1
    ), "Only 1 assign element can be set with `replace_entity=True`"
    return v

SingleConfig

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
169
170
171
172
173
174
175
176
177
class SingleConfig(BaseModel, extra=Extra.forbid):

    source: str
    terms: ListOrStr = []
    regex: ListOrStr = []
    regex_attr: Optional[str] = None
    regex_flags: Union[re.RegexFlag, int] = None
    exclude: Optional[ExcludeModel] = []
    assign: Optional[AssignModel] = []

source: str = None class-attribute

terms: ListOrStr = [] class-attribute

regex: ListOrStr = [] class-attribute

regex_attr: Optional[str] = None class-attribute

regex_flags: Union[re.RegexFlag, int] = None class-attribute

exclude: Optional[ExcludeModel] = [] class-attribute

assign: Optional[AssignModel] = [] class-attribute

FullConfig

Bases: BaseModel

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class FullConfig(BaseModel, extra=Extra.forbid):

    __root__: Union[
        List[SingleConfig],
        SingleConfig,
    ]

    @validator("__root__", pre=True)
    def pattern_to_list(cls, v):
        if not isinstance(v, list):
            return [v]
        return v

    @validator("__root__", pre=True)
    def source_uniqueness(cls, v):
        sources = [item["source"] for item in v]
        assert len(sources) == len(set(sources)), "Each `source` field should be unique"
        return v

__root__: Union[List[SingleConfig], SingleConfig] = None class-attribute

pattern_to_list(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
187
188
189
190
191
@validator("__root__", pre=True)
def pattern_to_list(cls, v):
    if not isinstance(v, list):
        return [v]
    return v

source_uniqueness(v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
193
194
195
196
197
@validator("__root__", pre=True)
def source_uniqueness(cls, v):
    sources = [item["source"] for item in v]
    assert len(sources) == len(set(sources)), "Each `source` field should be unique"
    return v

normalize_window(cls, v)

Source code in edsnlp/pipelines/core/contextual_matcher/models.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def normalize_window(cls, v):
    if isinstance(v, list):
        assert (
            len(v) == 2
        ), "`window` should be a tuple/list of two integer, or a single integer"
        v = tuple(v)
    if isinstance(v, int):
        assert v != 0, "The provided `window` should not be 0"
        if v < 0:
            return (v, 0)
        if v > 0:
            return (0, v)
    assert v[0] < v[1], "The provided `window` should contain at least 1 token"
    return v