Skip to content

edsnlp.pipelines.qualifiers.hypothesis

patterns

pseudo: List[str] = ['aucun doute', 'même si', 'pas de condition', 'pas de doute', 'sans aucun doute', 'sans condition', 'sans risque'] module-attribute

confirmation: List[str] = ['certain', 'certaine', 'certainement', 'certaines', 'certains', 'confirmer', 'évidemment', 'évident', 'évidente', 'montrer que', 'visiblement'] module-attribute

preceding: List[str] = ['à condition', 'à la condition que', 'à moins que', 'au cas où', 'conditionnellement', 'doute', 'en admettant que', 'en cas', 'en considérant que', 'en supposant que', 'éventuellement', 'faudrait', 'hypothèse', 'hypothèses', 'idée depas confirmer', 'pas sûr', 'pas sûre', 'peut correspondre', 'peut-être', 'peuvent correspondre', 'possible', 'possiblement', 'potentiel', 'potentielle', 'potentiellement', 'potentielles', 'potentiels', 'prédisposant à', 'probable', 'probablement', 'probables', "recherche d'recherche de", 'recherche des', 'risque', 'sauf si', 'selon', 'si', "s'il", 'soit', 'sous condition', 'sous réserve', 'suspicion'] module-attribute

following: List[str] = ['?', 'envisagé', 'envisageable', 'envisageables', 'envisagées', 'envisagés', 'hypothétique', 'hypothétiquement', 'hypothétiques', 'pas certain', 'pas certaine', 'pas clair', 'pas claire', 'pas confirmé', 'pas confirmée', 'pas confirmées', 'pas confirmés', 'pas évident', 'pas évidente', 'pas sûr', 'pas sûre', 'possible', 'potentiel', 'potentielle', 'potentiels', 'probable', 'probables', ': \n', ':\n'] module-attribute

verbs_hyp: List[str] = ['douter', 'envisager', "s'apparenter", 'sembler', 'soupçonner', 'suggérer', 'suspecter'] module-attribute

verbs_eds: List[str] = ['abandonner', 'abolir', 'aborder', 'accepter', 'accidenter', 'accompagnemer', 'accompagner', 'acoller', 'acquérir', 'activer', 'actualiser', 'adapter', 'adhérer', 'adjuver', 'admettre', 'administrer', 'adopter', 'adresser', 'aggraver', 'agir', 'agréer', 'aider', 'aimer', 'alcooliser', 'alerter', 'alimenter', 'aller', 'allonger', 'alléger', 'alterner', 'altérer', 'amender', 'amener', 'améliorer', 'amyotrophier', 'améliorer', 'analyser', 'anesthésier', 'animer', 'annexer', 'annuler', 'anonymiser', 'anticiper', 'anticoaguler', 'apercevoir', 'aplatir', 'apparaître', 'appareiller', 'appeler', 'appliquer', 'apporter', 'apprendre', 'apprécier', 'appuyer', 'argumenter', 'arquer', 'arrêter', 'arriver', 'arrêter', 'articuler', 'aspirer', 'asseoir', 'assister', 'associer', 'assurer', 'assécher', 'attacher', 'atteindre', 'attendre', 'attribuer', 'augmenter', 'autonomiser', 'autoriser', 'avaler', 'avancer', 'avertir', 'avoir', 'avérer', 'aérer', 'baisser', 'ballonner', 'blesser', 'bloquer', 'boire', 'border', 'brancher', 'brûler', 'bénéficier', 'cadrer', 'calcifier', 'calculer', 'calmer', 'canaliser', 'capter', 'carencer', 'casser', 'centrer', 'cerner', 'certifier', 'changer', 'charger', 'chevaucher', 'choisir', 'chronomoduler', 'chuter', 'cicatriser', 'circoncire', 'circuler', 'classer', 'codéiner', 'coincer', 'colorer', 'combler', 'commander', 'commencer', 'communiquer', 'comparer', 'compliquer', 'compléter', 'comporter', 'comprendre', 'comprimer', 'concerner', 'conclure', 'condamner', 'conditionner', 'conduire', 'confiner', 'confirmer', 'confronter', 'congeler', 'conjoindre', 'conjuguer', 'connaître', 'connecter', 'conseiller', 'conserver', 'considérer', 'consommer', 'constater', 'constituer', 'consulter', 'contacter', 'contaminer', 'contenir', 'contentionner', 'continuer', 'contracter', 'contrarier', 'contribuer', 'contrôler', 'convaincre', 'convenir', 'convier', 'convoquer', 'copier', 'correspondre', 'corriger', 'corréler', 'coucher', 'coupler', 'couvrir', 'crapotter', 'creuser', 'croire', 'croiser', 'créer', 'crémer', 'crépiter', 'cumuler', 'curariser', 'céder', 'dater', 'demander', 'demeurer', 'destiner', 'devenir', 'devoir', 'diagnostiquer', 'dialyser', 'dicter', 'diffuser', 'différencier', 'différer', 'digérer', 'dilater', 'diluer', 'diminuer', 'diner', 'dire', 'diriger', 'discuter', 'disparaître', 'disposer', 'dissocier', 'disséminer', 'disséquer', 'distendre', 'distinguer', 'divorcer', 'documenter', 'donner', 'dorer', 'doser', 'doubler', 'durer', 'dyaliser', 'dyspner', 'débuter', 'décaler', 'déceler', 'décider', 'déclarer', 'déclencher', 'découvrir', 'décrire', 'décroître', 'décurariser', 'décéder', 'dédier', 'définir', 'dégrader', 'délivrer', 'dépasser', 'dépendre', 'déplacer', 'dépolir', 'déposer', 'dériver', 'dérouler', 'désappareiller', 'désigner', 'désinfecter', 'désorienter', 'détecter', 'déterminer', 'détruire', 'développer', 'dévouer', 'dîner', 'écraser', 'effacer', 'effectuer', 'effondrer', 'emboliser', 'emmener', 'empêcher', 'encadrer', 'encourager', 'endormir', 'endurer', 'enlever', 'enregistrer', 'entamer', 'entendre', 'entourer', 'entraîner', 'entreprendre', 'entrer', 'envahir', 'envisager', 'envoyer', 'espérer', 'essayer', 'estimer', 'être', 'examiner', 'excentrer', 'exciser', 'exclure', 'expirer', 'expliquer', 'explorer', 'exposer', 'exprimer', 'extérioriser', 'exécuter', 'faciliter', 'faire', 'fatiguer', 'favoriser', 'faxer', 'fermer', 'figurer', 'fixer', 'focaliser', 'foncer', 'former', 'fournir', 'fractionner', 'fragmenter', 'fuiter', 'fusionner', 'garder', 'graver', 'guider', 'gérer', 'gêner', 'honorer', 'hopsitaliser', 'hospitaliser', 'hydrater', 'hyperartérialiser', 'hyperfixer', 'hypertrophier', 'hésiter', 'identifier', 'illustrer', 'immuniser', 'impacter', 'implanter', 'impliquer', 'importer', 'imposer', 'impregner', 'imprimer', 'inclure', 'indifferencier', 'indiquer', 'infecter', 'infertiliser', 'infiltrer', 'informer', 'inhaler', 'initier', 'injecter', 'inscrire', 'insister', 'installer', 'interdire', 'interpréter', 'interrompre', 'intervenir', 'intituler', 'introduire', 'intéragir', 'inverser', 'inviter', 'ioder', 'ioniser', 'irradier', 'itérativer', 'joindre', 'juger', 'justifier', 'laisser', 'laminer', 'lancer', 'latéraliser', 'laver', 'lever', 'lier', 'ligaturer', 'limiter', 'lire', 'localiser', 'loger', 'louper', 'luire', 'lutter', 'lyricer', 'lyser', 'maculer', 'macérer', 'maintenir', 'majorer', 'malaiser', 'manger', 'manifester', 'manipuler', 'manquer', 'marcher', 'marier', 'marmoner', 'marquer', 'masquer', 'masser', 'mater', 'mener', 'mesurer', 'meteoriser', 'mettre', 'mitiger', 'modifier', 'moduler', 'modérer', 'monter', 'montrer', 'motiver', 'moucheter', 'mouler', 'mourir', 'multiopéréer', 'munir', 'muter', 'médicaliser', 'météoriser', 'naître', 'normaliser', 'noter', 'nuire', 'numériser', 'nécessiter', 'négativer', 'objectiver', 'observer', 'obstruer', 'obtenir', 'occasionner', 'occuper', 'opposer', 'opérer', 'organiser', 'orienter', 'ouvrir', 'palper', 'parasiter', 'paraître', 'parcourir', 'parer', 'paresthésier', 'parfaire', 'partager', 'partir', 'parvenir', 'passer', 'penser', 'percevoir', 'perdre', 'perforer', 'permettre', 'persister', 'personnaliser', 'peser', 'pigmenter', 'piloter', 'placer', 'plaindre', 'planifier', 'plier', 'plonger', 'porter', 'poser', 'positionner', 'posséder', 'poursuivre', 'pousser', 'pouvoir', 'pratiquer', 'preciser', 'prendre', 'prescrire', 'prier', 'produire', 'programmer', 'prolonger', 'prononcer', 'proposer', 'prouver', 'provoquer', 'préciser', 'précéder', 'prédominer', 'préexister', 'préférer', 'prélever', 'préparer', 'présenter', 'préserver', 'prévenir', 'prévoir', 'puruler', 'pénétrer', 'radiofréquencer', 'ralentir', 'ramener', 'rappeler', 'rapporter', 'rapprocher', 'rassurer', 'rattacher', 'rattraper', 'realiser', 'recenser', 'recevoir', 'rechercher', 'recommander', 'reconnaître', 'reconsulter', 'recontacter', 'recontrôler', 'reconvoquer', 'recouvrir', 'recueillir', 'recuperer', 'redescendre', 'rediscuter', 'refaire', 'refouler', 'refuser', 'regarder', 'rehausser', 'relancer', 'relayer', 'relever', 'relire', 'relâcher', 'remanier', 'remarquer', 'remercier', 'remettre', 'remonter', 'remplacer', 'remplir', 'rencontrer', 'rendormir', 'rendre', 'renfermer', 'renforcer', 'renouveler', 'renseigner', 'rentrer', 'reparler', 'repasser', 'reporter', 'reprendre', 'represcrire', 'reproduire', 'reprogrammer', 'représenter', 'repérer', 'requérir', 'respecter', 'ressembler', 'ressentir', 'rester', 'restreindre', 'retarder', 'retenir', 'retirer', 'retrouver', 'revasculariser', 'revenir', 'reverticaliser', 'revoir', 'rompre', 'rouler', 'réadapter', 'réadmettre', 'réadresser', 'réaliser', 'récidiver', 'récupérer', 'rédiger', 'réduire', 'réessayer', 'réexpliquer', 'référer', 'régler', 'régresser', 'réhausser', 'réopérer', 'répartir', 'répondre', 'répéter', 'réserver', 'résorber', 'résoudre', 'réséquer', 'réveiller', 'révéler', 'réévaluer', 'rêver', 'sacrer', 'saisir', 'satisfaire', 'savoir', 'scanner', 'scolariser', 'sembler', 'sensibiliser', 'sentir', 'serrer', 'servir', 'sevrer', 'signaler', 'signer', 'situer', 'siéger', 'soigner', 'sommeiller', 'sonder', 'sortir', 'souffler', 'souhaiter', 'soulager', 'soussigner', 'souvenir', 'spécialiser', 'stabiliser', 'statuer', 'stenter', 'stopper', 'stratifier', 'subir', 'substituer', 'sucrer', 'suggérer', 'suivre', 'supporter', 'supprimer', 'surajouter', 'surmonter', 'surveiller', 'survenir', 'suspecter', 'suspendre', 'suturer', 'synchroniser', 'systématiser', 'sécréter', 'sécuriser', 'sédater', 'séjourner', 'séparer', 'taire', 'taper', 'teinter', 'tendre', 'tenir', 'tenter', 'terminer', 'tester', 'thromboser', 'tirer', 'tiroir', 'tissulaire', 'titulariser', 'tolérer', 'tourner', 'tracer', 'trachéotomiser', 'traduire', 'traiter', 'transcrire', 'transférer', 'transmettre', 'transporter', 'trasnfixer', 'travailler', 'tronquer', 'trouver', 'téléphoner', 'ulcérer', 'uriner', 'utiliser', 'vacciner', 'valider', 'valoir', 'varier', 'vasculariser', 'venir', 'verifier', 'vieillir', 'viser', 'visualiser', 'vivre', 'voir', 'vouloir', 'vérifier', 'ébaucher', 'écarter', 'échographier', 'échoguider', 'échoir', 'échouer', 'éclairer', 'écraser', 'élargir', 'éliminer', 'émousser', 'épaissir', 'épargner', 'épuiser', 'épurer', 'équilibrer', 'établir', 'étager', 'étendre', 'étiqueter', 'étrangler', 'évaluer', 'éviter', 'évoluer', 'évoquer', 'être'] module-attribute

hypothesis

Hypothesis

Bases: Qualifier

Hypothesis detection with spaCy.

The component looks for five kinds of expressions in the text :

  • preceding hypothesis, ie cues that precede a hypothetic expression
  • following hypothesis, ie cues that follow a hypothetic expression
  • pseudo hypothesis : contain a hypothesis cue, but are not hypothesis (eg "pas de doute"/"no doubt")
  • hypothetic verbs : verbs indicating hypothesis (eg "douter")
  • classic verbs conjugated to the conditional, thus indicating hypothesis
PARAMETER DESCRIPTION
nlp

spaCy nlp pipeline to use for matching.

TYPE: Language

pseudo

List of pseudo hypothesis cues.

TYPE: Optional[List[str]]

preceding

List of preceding hypothesis cues

TYPE: Optional[List[str]]

following

List of following hypothesis cues.

TYPE: Optional[List[str]]

verbs_hyp

List of hypothetic verbs.

TYPE: Optional[List[str]]

verbs_eds

List of mainstream verbs.

TYPE: Optional[List[str]]

filter_matches

Whether to filter out overlapping matches.

TYPE: bool

attr

spaCy's attribute to use: a string with the value "TEXT" or "NORM", or a dict with the key 'term_attr' we can also add a key for each regex.

TYPE: str

on_ents_only

Whether to look for matches around detected entities only. Useful for faster inference in downstream tasks.

TYPE: bool

within_ents

Whether to consider cues within entities.

TYPE: bool

explain

Whether to keep track of cues for each entity.

TYPE: bool

regex

A dictionnary of regex patterns.

TYPE: Optional[Dict[str, Union[List[str], str]]]

Source code in edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
class Hypothesis(Qualifier):
    """
    Hypothesis detection with spaCy.

    The component looks for five kinds of expressions in the text :

    - preceding hypothesis, ie cues that precede a hypothetic expression
    - following hypothesis, ie cues that follow a hypothetic expression
    - pseudo hypothesis : contain a hypothesis cue, but are not hypothesis
      (eg "pas de doute"/"no doubt")
    - hypothetic verbs : verbs indicating hypothesis (eg "douter")
    - classic verbs conjugated to the conditional, thus indicating hypothesis

    Parameters
    ----------
    nlp : Language
        spaCy nlp pipeline to use for matching.
    pseudo : Optional[List[str]]
        List of pseudo hypothesis cues.
    preceding : Optional[List[str]]
        List of preceding hypothesis cues
    following : Optional[List[str]]
        List of following hypothesis cues.
    verbs_hyp : Optional[List[str]]
        List of hypothetic verbs.
    verbs_eds : Optional[List[str]]
        List of mainstream verbs.
    filter_matches : bool
        Whether to filter out overlapping matches.
    attr : str
        spaCy's attribute to use:
        a string with the value "TEXT" or "NORM", or a dict with the key 'term_attr'
        we can also add a key for each regex.
    on_ents_only : bool
        Whether to look for matches around detected entities only.
        Useful for faster inference in downstream tasks.
    within_ents : bool
        Whether to consider cues within entities.
    explain : bool
        Whether to keep track of cues for each entity.
    regex : Optional[Dict[str, Union[List[str], str]]]
        A dictionnary of regex patterns.
    """

    defaults = dict(
        following=following,
        preceding=preceding,
        pseudo=pseudo,
        termination=termination,
        verbs_eds=verbs_eds,
        verbs_hyp=verbs_hyp,
    )

    def __init__(
        self,
        nlp: Language,
        attr: str,
        pseudo: Optional[List[str]],
        preceding: Optional[List[str]],
        following: Optional[List[str]],
        termination: Optional[List[str]],
        verbs_eds: Optional[List[str]],
        verbs_hyp: Optional[List[str]],
        on_ents_only: bool,
        within_ents: bool,
        explain: bool,
    ):

        terms = self.get_defaults(
            pseudo=pseudo,
            preceding=preceding,
            following=following,
            termination=termination,
            verbs_eds=verbs_eds,
            verbs_hyp=verbs_hyp,
        )
        terms["verbs"] = self.load_verbs(
            verbs_hyp=terms.pop("verbs_hyp"),
            verbs_eds=terms.pop("verbs_eds"),
        )

        super().__init__(
            nlp=nlp,
            attr=attr,
            on_ents_only=on_ents_only,
            explain=explain,
            **terms,
        )

        self.within_ents = within_ents
        self.set_extensions()

    @staticmethod
    def set_extensions() -> None:
        if not Token.has_extension("hypothesis"):
            Token.set_extension("hypothesis", default=False)

        if not Token.has_extension("hypothesis_"):
            Token.set_extension(
                "hypothesis_",
                getter=lambda token: "HYP" if token._.hypothesis else "CERT",
            )

        if not Span.has_extension("hypothesis"):
            Span.set_extension("hypothesis", default=False)

        if not Span.has_extension("hypothesis_"):
            Span.set_extension(
                "hypothesis_",
                getter=lambda span: "HYP" if span._.hypothesis else "CERT",
            )

        if not Span.has_extension("hypothesis_cues"):
            Span.set_extension("hypothesis_cues", default=[])

        if not Doc.has_extension("hypothesis"):
            Doc.set_extension("hypothesis", default=[])

    def load_verbs(
        self,
        verbs_hyp: List[str],
        verbs_eds: List[str],
    ) -> List[str]:
        """
        Conjugate "classic" verbs to conditional, and add hypothesis
        verbs conjugated to all tenses.

        Parameters
        ----------
        verbs_hyp: List of verbs that specifically imply an hypothesis.
        verbs_eds: List of general verbs.

        Returns
        -------
        list of hypothesis verbs conjugated at all tenses and classic
        verbs conjugated to conditional.
        """

        classic_verbs = get_verbs(verbs_eds)
        classic_verbs = classic_verbs.loc[classic_verbs["mode"] == "Conditionnel"]
        list_classic_verbs = list(classic_verbs["term"].unique())

        hypo_verbs = get_verbs(verbs_hyp)
        list_hypo_verbs = list(hypo_verbs["term"].unique())

        return list_hypo_verbs + list_classic_verbs

    def process(self, doc: Doc) -> Doc:
        """
        Finds entities related to hypothesis.

        Parameters
        ----------
        doc: spaCy Doc object

        Returns
        -------
        doc: spaCy Doc object, annotated for hypothesis
        """

        matches = self.get_matches(doc)

        terminations = get_spans(matches, "termination")
        boundaries = self._boundaries(doc, terminations)

        # Removes duplicate matches and pseudo-expressions in one statement
        matches = filter_spans(matches, label_to_remove="pseudo")

        entities = list(doc.ents) + list(doc.spans.get("discarded", []))
        ents = None

        for start, end in boundaries:

            ents, entities = consume_spans(
                entities,
                filter=lambda s: check_inclusion(s, start, end),
                second_chance=ents,
            )

            sub_matches, matches = consume_spans(
                matches, lambda s: start <= s.start < end
            )

            if self.on_ents_only and not ents:
                continue

            sub_preceding = get_spans(sub_matches, "preceding")
            sub_following = get_spans(sub_matches, "following")
            sub_verbs = get_spans(sub_matches, "verbs")

            if not sub_preceding + sub_following + sub_verbs:
                continue

            if not self.on_ents_only:
                for token in doc[start:end]:
                    token._.hypothesis = any(
                        m.end <= token.i for m in sub_preceding + sub_verbs
                    ) or any(m.start > token.i for m in sub_following)

            for ent in ents:

                if self.within_ents:
                    cues = [m for m in sub_preceding + sub_verbs if m.end <= ent.end]
                    cues += [m for m in sub_following if m.start >= ent.start]
                else:
                    cues = [m for m in sub_preceding + sub_verbs if m.end <= ent.start]
                    cues += [m for m in sub_following if m.start >= ent.end]

                hypothesis = ent._.hypothesis or bool(cues)

                ent._.hypothesis = hypothesis

                if self.explain and hypothesis:
                    ent._.hypothesis_cues += cues

                if not self.on_ents_only and hypothesis:
                    for token in ent:
                        token._.hypothesis = True

        return doc
defaults = dict(following=following, preceding=preceding, pseudo=pseudo, termination=termination, verbs_eds=verbs_eds, verbs_hyp=verbs_hyp) class-attribute
within_ents = within_ents instance-attribute
__init__(nlp, attr, pseudo, preceding, following, termination, verbs_eds, verbs_hyp, on_ents_only, within_ents, explain)
Source code in edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def __init__(
    self,
    nlp: Language,
    attr: str,
    pseudo: Optional[List[str]],
    preceding: Optional[List[str]],
    following: Optional[List[str]],
    termination: Optional[List[str]],
    verbs_eds: Optional[List[str]],
    verbs_hyp: Optional[List[str]],
    on_ents_only: bool,
    within_ents: bool,
    explain: bool,
):

    terms = self.get_defaults(
        pseudo=pseudo,
        preceding=preceding,
        following=following,
        termination=termination,
        verbs_eds=verbs_eds,
        verbs_hyp=verbs_hyp,
    )
    terms["verbs"] = self.load_verbs(
        verbs_hyp=terms.pop("verbs_hyp"),
        verbs_eds=terms.pop("verbs_eds"),
    )

    super().__init__(
        nlp=nlp,
        attr=attr,
        on_ents_only=on_ents_only,
        explain=explain,
        **terms,
    )

    self.within_ents = within_ents
    self.set_extensions()
set_extensions()
Source code in edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@staticmethod
def set_extensions() -> None:
    if not Token.has_extension("hypothesis"):
        Token.set_extension("hypothesis", default=False)

    if not Token.has_extension("hypothesis_"):
        Token.set_extension(
            "hypothesis_",
            getter=lambda token: "HYP" if token._.hypothesis else "CERT",
        )

    if not Span.has_extension("hypothesis"):
        Span.set_extension("hypothesis", default=False)

    if not Span.has_extension("hypothesis_"):
        Span.set_extension(
            "hypothesis_",
            getter=lambda span: "HYP" if span._.hypothesis else "CERT",
        )

    if not Span.has_extension("hypothesis_cues"):
        Span.set_extension("hypothesis_cues", default=[])

    if not Doc.has_extension("hypothesis"):
        Doc.set_extension("hypothesis", default=[])
load_verbs(verbs_hyp, verbs_eds)

Conjugate "classic" verbs to conditional, and add hypothesis verbs conjugated to all tenses.

PARAMETER DESCRIPTION
verbs_hyp

TYPE: List[str]

verbs_eds

TYPE: List[str]

RETURNS DESCRIPTION
list of hypothesis verbs conjugated at all tenses and classic
verbs conjugated to conditional.
Source code in edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def load_verbs(
    self,
    verbs_hyp: List[str],
    verbs_eds: List[str],
) -> List[str]:
    """
    Conjugate "classic" verbs to conditional, and add hypothesis
    verbs conjugated to all tenses.

    Parameters
    ----------
    verbs_hyp: List of verbs that specifically imply an hypothesis.
    verbs_eds: List of general verbs.

    Returns
    -------
    list of hypothesis verbs conjugated at all tenses and classic
    verbs conjugated to conditional.
    """

    classic_verbs = get_verbs(verbs_eds)
    classic_verbs = classic_verbs.loc[classic_verbs["mode"] == "Conditionnel"]
    list_classic_verbs = list(classic_verbs["term"].unique())

    hypo_verbs = get_verbs(verbs_hyp)
    list_hypo_verbs = list(hypo_verbs["term"].unique())

    return list_hypo_verbs + list_classic_verbs
process(doc)

Finds entities related to hypothesis.

PARAMETER DESCRIPTION
doc

TYPE: Doc

RETURNS DESCRIPTION
doc
Source code in edsnlp/pipelines/qualifiers/hypothesis/hypothesis.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def process(self, doc: Doc) -> Doc:
    """
    Finds entities related to hypothesis.

    Parameters
    ----------
    doc: spaCy Doc object

    Returns
    -------
    doc: spaCy Doc object, annotated for hypothesis
    """

    matches = self.get_matches(doc)

    terminations = get_spans(matches, "termination")
    boundaries = self._boundaries(doc, terminations)

    # Removes duplicate matches and pseudo-expressions in one statement
    matches = filter_spans(matches, label_to_remove="pseudo")

    entities = list(doc.ents) + list(doc.spans.get("discarded", []))
    ents = None

    for start, end in boundaries:

        ents, entities = consume_spans(
            entities,
            filter=lambda s: check_inclusion(s, start, end),
            second_chance=ents,
        )

        sub_matches, matches = consume_spans(
            matches, lambda s: start <= s.start < end
        )

        if self.on_ents_only and not ents:
            continue

        sub_preceding = get_spans(sub_matches, "preceding")
        sub_following = get_spans(sub_matches, "following")
        sub_verbs = get_spans(sub_matches, "verbs")

        if not sub_preceding + sub_following + sub_verbs:
            continue

        if not self.on_ents_only:
            for token in doc[start:end]:
                token._.hypothesis = any(
                    m.end <= token.i for m in sub_preceding + sub_verbs
                ) or any(m.start > token.i for m in sub_following)

        for ent in ents:

            if self.within_ents:
                cues = [m for m in sub_preceding + sub_verbs if m.end <= ent.end]
                cues += [m for m in sub_following if m.start >= ent.start]
            else:
                cues = [m for m in sub_preceding + sub_verbs if m.end <= ent.start]
                cues += [m for m in sub_following if m.start >= ent.end]

            hypothesis = ent._.hypothesis or bool(cues)

            ent._.hypothesis = hypothesis

            if self.explain and hypothesis:
                ent._.hypothesis_cues += cues

            if not self.on_ents_only and hypothesis:
                for token in ent:
                    token._.hypothesis = True

    return doc

factory

DEFAULT_CONFIG = dict(pseudo=None, preceding=None, following=None, termination=None, verbs_hyp=None, verbs_eds=None, attr='NORM', on_ents_only=True, within_ents=False, explain=False) module-attribute

create_component(nlp, name, attr, pseudo, preceding, following, termination, verbs_eds, verbs_hyp, on_ents_only, within_ents, explain)

Source code in edsnlp/pipelines/qualifiers/hypothesis/factory.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@deprecated_factory("hypothesis", "eds.hypothesis", default_config=DEFAULT_CONFIG)
@Language.factory("eds.hypothesis", default_config=DEFAULT_CONFIG)
def create_component(
    nlp: Language,
    name: str,
    attr: str,
    pseudo: Optional[List[str]],
    preceding: Optional[List[str]],
    following: Optional[List[str]],
    termination: Optional[List[str]],
    verbs_eds: Optional[List[str]],
    verbs_hyp: Optional[List[str]],
    on_ents_only: bool,
    within_ents: bool,
    explain: bool,
):
    return Hypothesis(
        nlp=nlp,
        attr=attr,
        pseudo=pseudo,
        preceding=preceding,
        following=following,
        termination=termination,
        verbs_eds=verbs_eds,
        verbs_hyp=verbs_hyp,
        on_ents_only=on_ents_only,
        within_ents=within_ents,
        explain=explain,
    )
Back to top