Skip to content

edsnlp.patch_spacy_dot_components

factory(cls, name, *, default_config=SimpleFrozenDict(), assigns=SimpleFrozenList(), requires=SimpleFrozenList(), retokenizes=False, default_score_weights=SimpleFrozenDict(), func=None) classmethod

Patched from spaCy to allow back dots in factory names (https://github.com/aphp/edsnlp/pull/152)

Register a new pipeline component factory. Can be used as a decorator on a function or classmethod, or called as a function with the factory provided as the func keyword argument. To create a component and add it to the pipeline, you can use nlp.add_pipe(name).

name (str): The name of the component factory. default_config (Dict[str, Any]): Default configuration, describing the default values of the factory arguments. assigns (Iterable[str]): Doc/Token attributes assigned by this component, e.g. "token.ent_id". Used for pipeline analysis. requires (Iterable[str]): Doc/Token attributes required by this component, e.g. "token.ent_id". Used for pipeline analysis. retokenizes (bool): Whether the component changes the tokenization. Used for pipeline analysis. default_score_weights (Dict[str, Optional[float]]): The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to 1.0 per component and will be combined and normalized for the whole pipeline. If None, the score won't be shown in the logs or be weighted. func (Optional[Callable]): Factory function if not used as a decorator.

DOCS: https://spacy.io/api/language#factory

Source code in edsnlp/patch_spacy_dot_components.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@classmethod
def factory(
    cls,
    name: str,
    *,
    default_config: Dict[str, Any] = SimpleFrozenDict(),
    assigns: Iterable[str] = SimpleFrozenList(),
    requires: Iterable[str] = SimpleFrozenList(),
    retokenizes: bool = False,
    default_score_weights: Dict[str, Optional[float]] = SimpleFrozenDict(),
    func: Optional[Callable] = None,
) -> Callable:
    """
    Patched from spaCy to allow back dots in factory
    names (https://github.com/aphp/edsnlp/pull/152)

    Register a new pipeline component factory. Can be used as a decorator
    on a function or classmethod, or called as a function with the factory
    provided as the func keyword argument. To create a component and add
    it to the pipeline, you can use nlp.add_pipe(name).

    name (str): The name of the component factory.
    default_config (Dict[str, Any]): Default configuration, describing the
        default values of the factory arguments.
    assigns (Iterable[str]): Doc/Token attributes assigned by this component,
        e.g. "token.ent_id". Used for pipeline analysis.
    requires (Iterable[str]): Doc/Token attributes required by this component,
        e.g. "token.ent_id". Used for pipeline analysis.
    retokenizes (bool): Whether the component changes the tokenization.
        Used for pipeline analysis.
    default_score_weights (Dict[str, Optional[float]]): The scores to report during
        training, and their default weight towards the final score used to
        select the best model. Weights should sum to 1.0 per component and
        will be combined and normalized for the whole pipeline. If None,
        the score won't be shown in the logs or be weighted.
    func (Optional[Callable]): Factory function if not used as a decorator.

    DOCS: https://spacy.io/api/language#factory
    """
    if not isinstance(name, str):
        raise ValueError(Errors.E963.format(decorator="factory"))
    if not isinstance(default_config, dict):
        err = Errors.E962.format(
            style="default config", name=name, cfg_type=type(default_config)
        )
        raise ValueError(err)

    def add_factory(factory_func: Callable) -> Callable:
        internal_name = cls.get_factory_name(name)
        if internal_name in registry.factories:
            # We only check for the internal name here – it's okay if it's a
            # subclass and the base class has a factory of the same name. We
            # also only raise if the function is different to prevent raising
            # if module is reloaded.
            existing_func = registry.factories.get(internal_name)
            if not util.is_same_func(factory_func, existing_func):
                err = Errors.E004.format(
                    name=name, func=existing_func, new_func=factory_func
                )
                raise ValueError(err)

        arg_names = util.get_arg_names(factory_func)
        if "nlp" not in arg_names or "name" not in arg_names:
            raise ValueError(Errors.E964.format(name=name))
        # Officially register the factory so we can later call
        # registry.resolve and refer to it in the config as
        # @factories = "spacy.Language.xyz". We use the class name here so
        # different classes can have different factories.
        registry.factories.register(internal_name, func=factory_func)
        factory_meta = FactoryMeta(
            factory=name,
            default_config=default_config,
            assigns=validate_attrs(assigns),
            requires=validate_attrs(requires),
            scores=list(default_score_weights.keys()),
            default_score_weights=default_score_weights,
            retokenizes=retokenizes,
        )
        cls.set_factory_meta(name, factory_meta)
        # We're overwriting the class attr with a frozen dict to handle
        # backwards-compat (writing to Language.factories directly). This
        # wouldn't work with an instance property and just produce a
        # confusing error – here we can show a custom error
        cls.factories = SimpleFrozenDict(
            registry.factories.get_all(), error=Errors.E957
        )
        return factory_func

    if func is not None:  # Support non-decorator use cases
        return add_factory(func)
    return add_factory

component(cls, name, *, assigns=SimpleFrozenList(), requires=SimpleFrozenList(), retokenizes=False, func=None) classmethod

Patched from spaCy to allow back dots in factory names (https://github.com/aphp/edsnlp/pull/152)

Register a new pipeline component. Can be used for stateless function components that don't require a separate factory. Can be used as a decorator on a function or classmethod, or called as a function with the factory provided as the func keyword argument. To create a component and add it to the pipeline, you can use nlp.add_pipe(name).

name (str): The name of the component factory. assigns (Iterable[str]): Doc/Token attributes assigned by this component, e.g. "token.ent_id". Used for pipeline analysis. requires (Iterable[str]): Doc/Token attributes required by this component, e.g. "token.ent_id". Used for pipeline analysis. retokenizes (bool): Whether the component changes the tokenization. Used for pipeline analysis. func (Optional[Callable]): Factory function if not used as a decorator.

DOCS: https://spacy.io/api/language#component

Source code in edsnlp/patch_spacy_dot_components.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
@classmethod
def component(
    cls,
    name: str,
    *,
    assigns: Iterable[str] = SimpleFrozenList(),
    requires: Iterable[str] = SimpleFrozenList(),
    retokenizes: bool = False,
    func: Optional["Pipe"] = None,
) -> Callable[..., Any]:
    """
    Patched from spaCy to allow back dots in factory
    names (https://github.com/aphp/edsnlp/pull/152)

    Register a new pipeline component. Can be used for stateless function
    components that don't require a separate factory. Can be used as a
    decorator on a function or classmethod, or called as a function with the
    factory provided as the func keyword argument. To create a component and
    add it to the pipeline, you can use nlp.add_pipe(name).

    name (str): The name of the component factory.
    assigns (Iterable[str]): Doc/Token attributes assigned by this component,
        e.g. "token.ent_id". Used for pipeline analysis.
    requires (Iterable[str]): Doc/Token attributes required by this component,
        e.g. "token.ent_id". Used for pipeline analysis.
    retokenizes (bool): Whether the component changes the tokenization.
        Used for pipeline analysis.
    func (Optional[Callable]): Factory function if not used as a decorator.

    DOCS: https://spacy.io/api/language#component
    """
    if name is not None:
        if not isinstance(name, str):
            raise ValueError(Errors.E963.format(decorator="component"))
    component_name = name if name is not None else util.get_object_name(func)

    def add_component(component_func: "Pipe") -> Callable:
        if isinstance(func, type):  # function is a class
            raise ValueError(Errors.E965.format(name=component_name))

        def factory_func(nlp, name: str) -> "Pipe":
            return component_func

        internal_name = cls.get_factory_name(name)
        if internal_name in registry.factories:
            # We only check for the internal name here – it's okay if it's a
            # subclass and the base class has a factory of the same name. We
            # also only raise if the function is different to prevent raising
            # if module is reloaded. It's hacky, but we need to check the
            # existing functure for a closure and whether that's identical
            # to the component function (because factory_func created above
            # will always be different, even for the same function)
            existing_func = registry.factories.get(internal_name)
            closure = existing_func.__closure__
            wrapped = [c.cell_contents for c in closure][0] if closure else None
            if util.is_same_func(wrapped, component_func):
                factory_func = existing_func  # noqa: F811

        cls.factory(
            component_name,
            assigns=assigns,
            requires=requires,
            retokenizes=retokenizes,
            func=factory_func,
        )
        return component_func

    if func is not None:  # Support non-decorator use cases
        return add_component(func)
    return add_component