Skip to content

edsnlp.pipelines.trainable.span_qualifier.utils

make_candidate_getter

Source code in edsnlp/pipelines/trainable/span_qualifier/utils.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class make_candidate_getter:
    def __init__(
        self,
        on_ents: Optional[Union[bool, Sequence[str]]] = None,
        on_span_groups: Union[
            bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]]
        ] = False,
        qualifiers: Optional[Sequence[str]] = None,
        label_constraints: Optional[Dict[str, List[str]]] = None,
    ):

        """
        Make a span qualifier candidate getter function.

        Parameters
        ----------
        on_ents: Union[bool, Sequence[str]]
            Whether to look into `doc.ents` for spans to classify. If a list of strings
            is provided, only the span of the given labels will be considered. If None
            and `on_span_groups` is False, labels mentioned in `label_constraints`
            will be used.
        on_span_groups: Union[bool, Sequence[str], Mapping[str, Sequence[str]]]
            Whether to look into `doc.spans` for spans to classify:

            - If True, all span groups will be considered
            - If False, no span group will be considered
            - If a list of str is provided, only these span groups will be kept
            - If a mapping is provided, the keys are the span group names and the values
              are either a list of allowed labels in the group or True to keep them all
        qualifiers: Optional[Sequence[str]]
            The qualifiers to predict or train on. If None, keys from the
            `label_constraints` will be used
        label_constraints: Optional[Dict[str, List[str]]]
            Constraints to select qualifiers for each span depending on their labels.
            Keys of the dict are the qualifiers and values are the labels for which
            the qualifier is allowed. If None, all qualifiers will be used for all spans

        Returns
        -------
        Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]]
        """

        if qualifiers is None and label_constraints is None:
            raise ValueError(
                "Either `qualifiers` or `label_constraints` must be given to "
                "provide the qualifiers to predict / train on."
            )
        elif qualifiers is None:
            qualifiers = list(label_constraints.keys())

        if not on_span_groups and on_ents is None:
            if label_constraints is None:
                on_ents = True
            else:
                on_ents = sorted(
                    set(
                        label
                        for qualifier in label_constraints
                        for label in label_constraints[qualifier]
                    )
                )

        self.span_getter = make_span_getter(on_ents, on_span_groups)
        self.label_constraints = label_constraints
        self.qualifiers = qualifiers

    def __call__(
        self,
        doc: Doc,
    ) -> Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]:
        flattened_spans, ents, span_groups = self.span_getter(
            doc,
            return_origin=True,
        )

        if self.label_constraints:
            span_qualifiers = [
                [
                    qualifier
                    for qualifier in self.qualifiers
                    if qualifier not in self.label_constraints
                    or span.label_ in self.label_constraints[qualifier]
                ]
                for span in flattened_spans
            ]
        else:
            span_qualifiers = [self.qualifiers] * len(flattened_spans)
        return flattened_spans, ents, span_groups, span_qualifiers

__init__(on_ents=None, on_span_groups=False, qualifiers=None, label_constraints=None)

Make a span qualifier candidate getter function.

PARAMETER DESCRIPTION
on_ents

Whether to look into doc.ents for spans to classify. If a list of strings is provided, only the span of the given labels will be considered. If None and on_span_groups is False, labels mentioned in label_constraints will be used.

TYPE: Optional[Union[bool, Sequence[str]]] DEFAULT: None

on_span_groups

Whether to look into doc.spans for spans to classify:

  • If True, all span groups will be considered
  • If False, no span group will be considered
  • If a list of str is provided, only these span groups will be kept
  • If a mapping is provided, the keys are the span group names and the values are either a list of allowed labels in the group or True to keep them all

TYPE: Union[bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]]] DEFAULT: False

qualifiers

The qualifiers to predict or train on. If None, keys from the label_constraints will be used

TYPE: Optional[Sequence[str]] DEFAULT: None

label_constraints

Constraints to select qualifiers for each span depending on their labels. Keys of the dict are the qualifiers and values are the labels for which the qualifier is allowed. If None, all qualifiers will be used for all spans

TYPE: Optional[Dict[str, List[str]]] DEFAULT: None

RETURNS DESCRIPTION
Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]]
Source code in edsnlp/pipelines/trainable/span_qualifier/utils.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def __init__(
    self,
    on_ents: Optional[Union[bool, Sequence[str]]] = None,
    on_span_groups: Union[
        bool, Sequence[str], Mapping[str, Union[bool, Sequence[str]]]
    ] = False,
    qualifiers: Optional[Sequence[str]] = None,
    label_constraints: Optional[Dict[str, List[str]]] = None,
):

    """
    Make a span qualifier candidate getter function.

    Parameters
    ----------
    on_ents: Union[bool, Sequence[str]]
        Whether to look into `doc.ents` for spans to classify. If a list of strings
        is provided, only the span of the given labels will be considered. If None
        and `on_span_groups` is False, labels mentioned in `label_constraints`
        will be used.
    on_span_groups: Union[bool, Sequence[str], Mapping[str, Sequence[str]]]
        Whether to look into `doc.spans` for spans to classify:

        - If True, all span groups will be considered
        - If False, no span group will be considered
        - If a list of str is provided, only these span groups will be kept
        - If a mapping is provided, the keys are the span group names and the values
          are either a list of allowed labels in the group or True to keep them all
    qualifiers: Optional[Sequence[str]]
        The qualifiers to predict or train on. If None, keys from the
        `label_constraints` will be used
    label_constraints: Optional[Dict[str, List[str]]]
        Constraints to select qualifiers for each span depending on their labels.
        Keys of the dict are the qualifiers and values are the labels for which
        the qualifier is allowed. If None, all qualifiers will be used for all spans

    Returns
    -------
    Callable[[Doc], Tuple[Spans, Optional[Spans], SpanGroups, List[List[str]]]]
    """

    if qualifiers is None and label_constraints is None:
        raise ValueError(
            "Either `qualifiers` or `label_constraints` must be given to "
            "provide the qualifiers to predict / train on."
        )
    elif qualifiers is None:
        qualifiers = list(label_constraints.keys())

    if not on_span_groups and on_ents is None:
        if label_constraints is None:
            on_ents = True
        else:
            on_ents = sorted(
                set(
                    label
                    for qualifier in label_constraints
                    for label in label_constraints[qualifier]
                )
            )

    self.span_getter = make_span_getter(on_ents, on_span_groups)
    self.label_constraints = label_constraints
    self.qualifiers = qualifiers

make_binding_getter(qualifier)

Make a qualifier getter

PARAMETER DESCRIPTION
qualifier

Either one of the following: - a path to a nested attributes of the span, such as "qualifier_" or "_.negated" - a tuple of (key, value) equality, such as ("_.date.mode", "PASSED")

TYPE: Union[str, Binding]

RETURNS DESCRIPTION
Callable[[Span], bool]

The qualifier getter

Source code in edsnlp/pipelines/trainable/span_qualifier/utils.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def make_binding_getter(qualifier: Union[str, Binding]):
    """
    Make a qualifier getter

    Parameters
    ----------
    qualifier: Union[str, Binding]
        Either one of the following:
        - a path to a nested attributes of the span, such as "qualifier_" or "_.negated"
        - a tuple of (key, value) equality, such as `("_.date.mode", "PASSED")`

    Returns
    -------
    Callable[[Span], bool]
        The qualifier getter
    """
    if isinstance(qualifier, tuple):
        path, value = qualifier
        _check_path(path)
        return eval(f"lambda span: span.{path} == value", {"value": value}, {})
    else:
        _check_path(qualifier)
        return eval(f"lambda span: span.{qualifier}")

make_binding_setter(binding)

Make a qualifier setter

PARAMETER DESCRIPTION
binding

A pair of - a path to a nested attributes of the span, such as qualifier_ or _.negated - a value assignment

TYPE: Binding

RETURNS DESCRIPTION
Callable[[Span]]

The qualifier setter

Source code in edsnlp/pipelines/trainable/span_qualifier/utils.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def make_binding_setter(binding: Binding):
    """
    Make a qualifier setter

    Parameters
    ----------
    binding: Binding
        A pair of
        - a path to a nested attributes of the span, such as `qualifier_` or `_.negated`
        - a value assignment

    Returns
    -------
    Callable[[Span]]
        The qualifier setter
    """
    path, value = binding
    _check_path(path)
    fn_string = f"""def fn(span): span.{path} = value"""
    loc = {"value": value}
    exec(fn_string, loc, loc)
    return loc["fn"]