Skip to content

edsnlp.pipelines.qualifiers.negation

negation

Negation

Bases: Qualifier

Implements the NegEx algorithm.

The component looks for five kinds of expressions in the text :

  • preceding negations, ie cues that precede a negated expression

  • following negations, ie cues that follow a negated expression

  • pseudo negations : contain a negation cue, but are not negations (eg "pas de doute"/"no doubt")

  • negation verbs, ie verbs that indicate a negation

  • terminations, ie words that delimit propositions. The negation spans from the preceding cue to the termination.

PARAMETER DESCRIPTION
nlp

spaCy nlp pipeline to use for matching.

TYPE: Language

attr

spaCy's attribute to use

TYPE: str

pseudo

List of pseudo negation terms.

TYPE: Optional[List[str]]

preceding

List of preceding negation terms

TYPE: Optional[List[str]]

following

List of following negation terms.

TYPE: Optional[List[str]]

termination

List of termination terms.

TYPE: Optional[List[str]]

verbs

List of negation verbs.

TYPE: Optional[List[str]]

on_ents_only

Whether to look for matches around detected entities only. Useful for faster inference in downstream tasks.

TYPE: bool

within_ents

Whether to consider cues within entities.

TYPE: bool

explain

Whether to keep track of cues for each entity.

TYPE: bool

Source code in edsnlp/pipelines/qualifiers/negation/negation.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
class Negation(Qualifier):
    """
    Implements the NegEx algorithm.

    The component looks for five kinds of expressions in the text :

    - preceding negations, ie cues that precede a negated expression

    - following negations, ie cues that follow a negated expression

    - pseudo negations : contain a negation cue, but are not negations
      (eg "pas de doute"/"no doubt")

    - negation verbs, ie verbs that indicate a negation

    - terminations, ie words that delimit propositions.
      The negation spans from the preceding cue to the termination.

    Parameters
    ----------
    nlp : Language
        spaCy nlp pipeline to use for matching.
    attr : str
        spaCy's attribute to use
    pseudo : Optional[List[str]]
        List of pseudo negation terms.
    preceding : Optional[List[str]]
        List of preceding negation terms
    following : Optional[List[str]]
        List of following negation terms.
    termination : Optional[List[str]]
        List of termination terms.
    verbs : Optional[List[str]]
        List of negation verbs.
    on_ents_only : bool
        Whether to look for matches around detected entities only.
        Useful for faster inference in downstream tasks.
    within_ents : bool
        Whether to consider cues within entities.
    explain : bool
        Whether to keep track of cues for each entity.
    """

    defaults = dict(
        following=following,
        preceding=preceding,
        pseudo=pseudo,
        verbs=verbs,
        termination=termination,
    )

    def __init__(
        self,
        nlp: Language,
        attr: str,
        pseudo: Optional[List[str]],
        preceding: Optional[List[str]],
        following: Optional[List[str]],
        termination: Optional[List[str]],
        verbs: Optional[List[str]],
        on_ents_only: bool,
        within_ents: bool,
        explain: bool,
    ):

        terms = self.get_defaults(
            pseudo=pseudo,
            preceding=preceding,
            following=following,
            termination=termination,
            verbs=verbs,
        )
        terms["verbs"] = self.load_verbs(terms["verbs"])

        super().__init__(
            nlp=nlp,
            attr=attr,
            on_ents_only=on_ents_only,
            explain=explain,
            **terms,
        )

        self.within_ents = within_ents
        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:

        if not Token.has_extension("negation"):
            Token.set_extension("negation", default=False)

        if not Token.has_extension("negated"):
            Token.set_extension(
                "negated", getter=deprecated_getter_factory("negated", "negation")
            )

        if not Token.has_extension("negation_"):
            Token.set_extension(
                "negation_",
                getter=lambda token: "NEG" if token._.negation else "AFF",
            )

        if not Token.has_extension("polarity_"):
            Token.set_extension(
                "polarity_",
                getter=deprecated_getter_factory("polarity_", "negation_"),
            )

        if not Span.has_extension("negation"):
            Span.set_extension("negation", default=False)

        if not Span.has_extension("negated"):
            Span.set_extension(
                "negated", getter=deprecated_getter_factory("negated", "negation")
            )

        if not Span.has_extension("negation_cues"):
            Span.set_extension("negation_cues", default=[])

        if not Span.has_extension("negation_"):
            Span.set_extension(
                "negation_",
                getter=lambda span: "NEG" if span._.negation else "AFF",
            )

        if not Span.has_extension("polarity_"):
            Span.set_extension(
                "polarity_",
                getter=deprecated_getter_factory("polarity_", "negation_"),
            )

        if not Doc.has_extension("negations"):
            Doc.set_extension("negations", default=[])

    def load_verbs(self, verbs: List[str]) -> List[str]:
        """
        Conjugate negating verbs to specific tenses.

        Parameters
        ----------
        verbs: list of negating verbs to conjugate

        Returns
        -------
        list_neg_verbs: List of negating verbs conjugated to specific tenses.
        """

        neg_verbs = get_verbs(verbs)

        neg_verbs = neg_verbs.loc[
            ((neg_verbs["mode"] == "Indicatif") & (neg_verbs["tense"] == "Présent"))
            | (neg_verbs["tense"] == "Participe Présent")
            | (neg_verbs["tense"] == "Participe Passé")
        ]

        list_neg_verbs = list(neg_verbs["term"].unique())

        return list_neg_verbs

    def annotate_entity(
        self,
        ent: Span,
        sub_preceding: List[Span],
        sub_following: List[Span],
    ) -> None:
        """
        Annotate entities using preceding and following negations.

        Parameters
        ----------
        ent : Span
            Entity to annotate
        sub_preceding : List[Span]
            List of preceding negations cues
        sub_following : List[Span]
            List of following negations cues
        """
        if self.within_ents:
            cues = [m for m in sub_preceding if m.end <= ent.end]
            cues += [m for m in sub_following if m.start >= ent.start]
        else:
            cues = [m for m in sub_preceding if m.end <= ent.start]
            cues += [m for m in sub_following if m.start >= ent.end]

        negation = ent._.negation or bool(cues)

        ent._.negation = negation

        if self.explain and negation:
            ent._.negation_cues += cues

        if not self.on_ents_only and negation:
            for token in ent:
                token._.negation = True

    def process(self, doc: Doc) -> Doc:
        """
        Finds entities related to negation.

        Parameters
        ----------
        doc: spaCy `Doc` object

        Returns
        -------
        doc: spaCy `Doc` object, annotated for negation
        """

        matches = self.get_matches(doc)

        terminations = get_spans(matches, "termination")
        boundaries = self._boundaries(doc, terminations)

        entities = list(doc.ents) + list(doc.spans.get("discarded", []))
        ents = None

        # Removes duplicate matches and pseudo-expressions in one statement
        matches = filter_spans(matches, label_to_remove="pseudo")

        for start, end in boundaries:

            ents, entities = consume_spans(
                entities,
                filter=lambda s: check_inclusion(s, start, end),
                second_chance=ents,
            )

            sub_matches, matches = consume_spans(
                matches, lambda s: start <= s.start < end
            )

            if self.on_ents_only and not ents:
                continue

            sub_preceding = get_spans(sub_matches, "preceding")
            sub_following = get_spans(sub_matches, "following")
            # Verbs precede negated content
            sub_preceding += get_spans(sub_matches, "verbs")

            if not sub_preceding + sub_following:
                continue

            if not self.on_ents_only:
                for token in doc[start:end]:
                    token._.negation = any(
                        m.end <= token.i for m in sub_preceding
                    ) or any(m.start > token.i for m in sub_following)

            for ent in ents:
                self.annotate_entity(
                    ent=ent,
                    sub_preceding=sub_preceding,
                    sub_following=sub_following,
                )

        return doc

    def __call__(self, doc: Doc) -> Doc:
        return self.process(doc)
defaults = dict(following=following, preceding=preceding, pseudo=pseudo, verbs=verbs, termination=termination) class-attribute
within_ents = within_ents instance-attribute
__init__(nlp, attr, pseudo, preceding, following, termination, verbs, on_ents_only, within_ents, explain)
Source code in edsnlp/pipelines/qualifiers/negation/negation.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def __init__(
    self,
    nlp: Language,
    attr: str,
    pseudo: Optional[List[str]],
    preceding: Optional[List[str]],
    following: Optional[List[str]],
    termination: Optional[List[str]],
    verbs: Optional[List[str]],
    on_ents_only: bool,
    within_ents: bool,
    explain: bool,
):

    terms = self.get_defaults(
        pseudo=pseudo,
        preceding=preceding,
        following=following,
        termination=termination,
        verbs=verbs,
    )
    terms["verbs"] = self.load_verbs(terms["verbs"])

    super().__init__(
        nlp=nlp,
        attr=attr,
        on_ents_only=on_ents_only,
        explain=explain,
        **terms,
    )

    self.within_ents = within_ents
    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/qualifiers/negation/negation.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
@staticmethod
def set_extensions() -> None:

    if not Token.has_extension("negation"):
        Token.set_extension("negation", default=False)

    if not Token.has_extension("negated"):
        Token.set_extension(
            "negated", getter=deprecated_getter_factory("negated", "negation")
        )

    if not Token.has_extension("negation_"):
        Token.set_extension(
            "negation_",
            getter=lambda token: "NEG" if token._.negation else "AFF",
        )

    if not Token.has_extension("polarity_"):
        Token.set_extension(
            "polarity_",
            getter=deprecated_getter_factory("polarity_", "negation_"),
        )

    if not Span.has_extension("negation"):
        Span.set_extension("negation", default=False)

    if not Span.has_extension("negated"):
        Span.set_extension(
            "negated", getter=deprecated_getter_factory("negated", "negation")
        )

    if not Span.has_extension("negation_cues"):
        Span.set_extension("negation_cues", default=[])

    if not Span.has_extension("negation_"):
        Span.set_extension(
            "negation_",
            getter=lambda span: "NEG" if span._.negation else "AFF",
        )

    if not Span.has_extension("polarity_"):
        Span.set_extension(
            "polarity_",
            getter=deprecated_getter_factory("polarity_", "negation_"),
        )

    if not Doc.has_extension("negations"):
        Doc.set_extension("negations", default=[])
load_verbs(verbs)

Conjugate negating verbs to specific tenses.

PARAMETER DESCRIPTION
verbs

TYPE: List[str]

RETURNS DESCRIPTION
list_neg_verbs
Source code in edsnlp/pipelines/qualifiers/negation/negation.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def load_verbs(self, verbs: List[str]) -> List[str]:
    """
    Conjugate negating verbs to specific tenses.

    Parameters
    ----------
    verbs: list of negating verbs to conjugate

    Returns
    -------
    list_neg_verbs: List of negating verbs conjugated to specific tenses.
    """

    neg_verbs = get_verbs(verbs)

    neg_verbs = neg_verbs.loc[
        ((neg_verbs["mode"] == "Indicatif") & (neg_verbs["tense"] == "Présent"))
        | (neg_verbs["tense"] == "Participe Présent")
        | (neg_verbs["tense"] == "Participe Passé")
    ]

    list_neg_verbs = list(neg_verbs["term"].unique())

    return list_neg_verbs
annotate_entity(ent, sub_preceding, sub_following)

Annotate entities using preceding and following negations.

PARAMETER DESCRIPTION
ent

Entity to annotate

TYPE: Span

sub_preceding

List of preceding negations cues

TYPE: List[Span]

sub_following

List of following negations cues

TYPE: List[Span]

Source code in edsnlp/pipelines/qualifiers/negation/negation.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def annotate_entity(
    self,
    ent: Span,
    sub_preceding: List[Span],
    sub_following: List[Span],
) -> None:
    """
    Annotate entities using preceding and following negations.

    Parameters
    ----------
    ent : Span
        Entity to annotate
    sub_preceding : List[Span]
        List of preceding negations cues
    sub_following : List[Span]
        List of following negations cues
    """
    if self.within_ents:
        cues = [m for m in sub_preceding if m.end <= ent.end]
        cues += [m for m in sub_following if m.start >= ent.start]
    else:
        cues = [m for m in sub_preceding if m.end <= ent.start]
        cues += [m for m in sub_following if m.start >= ent.end]

    negation = ent._.negation or bool(cues)

    ent._.negation = negation

    if self.explain and negation:
        ent._.negation_cues += cues

    if not self.on_ents_only and negation:
        for token in ent:
            token._.negation = True
process(doc)

Finds entities related to negation.

PARAMETER DESCRIPTION
doc

TYPE: Doc

RETURNS DESCRIPTION
doc
Source code in edsnlp/pipelines/qualifiers/negation/negation.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
def process(self, doc: Doc) -> Doc:
    """
    Finds entities related to negation.

    Parameters
    ----------
    doc: spaCy `Doc` object

    Returns
    -------
    doc: spaCy `Doc` object, annotated for negation
    """

    matches = self.get_matches(doc)

    terminations = get_spans(matches, "termination")
    boundaries = self._boundaries(doc, terminations)

    entities = list(doc.ents) + list(doc.spans.get("discarded", []))
    ents = None

    # Removes duplicate matches and pseudo-expressions in one statement
    matches = filter_spans(matches, label_to_remove="pseudo")

    for start, end in boundaries:

        ents, entities = consume_spans(
            entities,
            filter=lambda s: check_inclusion(s, start, end),
            second_chance=ents,
        )

        sub_matches, matches = consume_spans(
            matches, lambda s: start <= s.start < end
        )

        if self.on_ents_only and not ents:
            continue

        sub_preceding = get_spans(sub_matches, "preceding")
        sub_following = get_spans(sub_matches, "following")
        # Verbs precede negated content
        sub_preceding += get_spans(sub_matches, "verbs")

        if not sub_preceding + sub_following:
            continue

        if not self.on_ents_only:
            for token in doc[start:end]:
                token._.negation = any(
                    m.end <= token.i for m in sub_preceding
                ) or any(m.start > token.i for m in sub_following)

        for ent in ents:
            self.annotate_entity(
                ent=ent,
                sub_preceding=sub_preceding,
                sub_following=sub_following,
            )

    return doc
__call__(doc)
Source code in edsnlp/pipelines/qualifiers/negation/negation.py
273
274
def __call__(self, doc: Doc) -> Doc:
    return self.process(doc)

factory

DEFAULT_CONFIG = dict(pseudo=None, preceding=None, following=None, termination=None, verbs=None, attr='NORM', on_ents_only=True, within_ents=False, explain=False) module-attribute

create_component(nlp, name, attr, pseudo, preceding, following, termination, verbs, on_ents_only, within_ents, explain)

Source code in edsnlp/pipelines/qualifiers/negation/factory.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@deprecated_factory("negation", "eds.negation", default_config=DEFAULT_CONFIG)
@Language.factory("eds.negation", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    attr: str,
    pseudo: Optional[List[str]],
    preceding: Optional[List[str]],
    following: Optional[List[str]],
    termination: Optional[List[str]],
    verbs: Optional[List[str]],
    on_ents_only: bool,
    within_ents: bool,
    explain: bool,
):

    return Negation(
        nlp=nlp,
        attr=attr,
        pseudo=pseudo,
        preceding=preceding,
        following=following,
        termination=termination,
        verbs=verbs,
        on_ents_only=on_ents_only,
        within_ents=within_ents,
        explain=explain,
    )

patterns

pseudo: List[str] = ['aucun changement', 'aucun doute', 'aucune hésitation', 'aucune diminution', 'ne permet pas d', 'ne permet pas de', "n'exclut pas", 'non négligeable', "pas d'amélioration", "pas d'augmentation", "pas d'autre", 'pas de changement', 'pas de diminution', 'pas de doute', 'pas exclu', 'pas exclue', 'pas exclues', 'pas exclus', 'pas immunisé', 'pas immunisée', 'pas immunisés', 'pas immunisées', 'sans amélioration', 'sans aucun doute', 'sans augmentation', 'sans certitude', 'sans changement', 'sans diminution', 'sans doute', 'sans être certain'] module-attribute

preceding: List[str] = ['à la place de', 'absence', 'absence de signe de', 'absence de', 'aucun signe de', 'aucun', 'aucune preuve', 'aucune', 'aucunes', 'aucuns', 'décline', 'décliné', 'dépourvu', 'dépourvue', 'dépourvues', 'dépourvus', 'disparition de', 'disparition des', 'excluent', 'exclut', 'impossibilité de', 'immunisé', 'immunisée', 'immunisés', 'immunisées', 'incompatible avec', 'incompatibles avec', 'jamais', 'ne manifestaient pas', 'ne manifestait pas', 'ne manifeste pas', 'ne manifestent pas', 'ne pas', 'ne présentaient pas', 'ne présentait pas', 'ne présente pas', 'ne présentent pas', 'ne ressemble pas', 'ne ressemblent pas', 'négatif pour', "n'est pas", "n'était pas", 'ni', 'niant', 'nie', 'nié', 'nullement', 'pas d', 'pas de cause de', 'pas de signe de', 'pas de signes de', 'pas de', 'pas nécessaire de', 'pas', "permet d'exclure", "plus d'aspect de", 'sans manifester de', 'sans présenter de', 'sans', 'symptôme atypique'] module-attribute

following: List[str] = [':0', ': 0', ':non', ': non', 'absent', 'absente', 'absentes', 'absents', 'dépourvu', 'dépourvue', 'dépourvues', 'dépourvus', 'disparaissent', 'disparait', 'est exclu', 'est exclue', 'immunisé', 'immunisée', 'immunisés', 'immunisées', 'impossible', 'improbable', 'négatif', 'négatifs', 'négative', 'négatives', 'négligeable', 'négligeables', 'nié', 'niée', 'non', 'pas nécessaire', 'peu probable', 'sont exclues', 'sont exclus'] module-attribute

verbs: List[str] = ['éliminer', 'exclure', 'interdire', 'nier', 'réfuter', 'rejeter'] module-attribute

Back to top