Skip to content

edsnlp.pipelines.core.normalizer.accents

accents

Accents

Bases: object

Normalises accents, using a same-length strategy.

PARAMETER DESCRIPTION
accents

List of accentuated characters and their transcription.

TYPE: List[Tuple[str, str]]

Source code in edsnlp/pipelines/core/normalizer/accents/accents.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Accents(object):
    """
    Normalises accents, using a same-length strategy.

    Parameters
    ----------
    accents : List[Tuple[str, str]]
        List of accentuated characters and their transcription.
    """

    def __init__(self, accents: Optional[List[Tuple[str, str]]]) -> None:
        if accents is None:
            accents = patterns.accents
        self.accents = accents

    def __call__(self, doc: Doc) -> Doc:
        """
        Remove accents from spacy `NORM` attribute.

        Parameters
        ----------
        doc : Doc
            The spaCy `Doc` object.

        Returns
        -------
        Doc
            The document, with accents removed in `Token.norm_`.
        """

        for token in doc:
            token.norm_ = replace(text=token.norm_, rep=self.accents)

        return doc
__call__(doc)

Remove accents from spacy NORM attribute.

PARAMETER DESCRIPTION
doc

The spaCy Doc object.

TYPE: Doc

RETURNS DESCRIPTION
Doc

The document, with accents removed in Token.norm_.

Source code in edsnlp/pipelines/core/normalizer/accents/accents.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def __call__(self, doc: Doc) -> Doc:
    """
    Remove accents from spacy `NORM` attribute.

    Parameters
    ----------
    doc : Doc
        The spaCy `Doc` object.

    Returns
    -------
    Doc
        The document, with accents removed in `Token.norm_`.
    """

    for token in doc:
        token.norm_ = replace(text=token.norm_, rep=self.accents)

    return doc
Back to top