Skip to content

edsnlp.utils.examples

entity_pattern = re.compile('(<ent[^<>]*>[^<>]+</ent>)') module-attribute

text_pattern = re.compile('<ent.*>(.+)</ent>') module-attribute

modifiers_pattern = re.compile('<ent\\s?(.*)>.+</ent>') module-attribute

single_modifiers_pattern = regex.compile("(?P<key>[^\\s]+?)=((?P<value>[^\\s']+)|'(?P<value>.+)')") module-attribute

Match

Bases: BaseModel

Source code in edsnlp/utils/examples.py
 8
 9
10
11
12
class Match(BaseModel):
    start_char: int
    end_char: int
    text: str
    modifiers: str

start_char: int = None class-attribute

end_char: int = None class-attribute

text: str = None class-attribute

modifiers: str = None class-attribute

Modifier

Bases: BaseModel

Source code in edsnlp/utils/examples.py
15
16
17
class Modifier(BaseModel):
    key: str
    value: Union[int, float, bool, str]

key: str = None class-attribute

value: Union[int, float, bool, str] = None class-attribute

Entity

Bases: BaseModel

Source code in edsnlp/utils/examples.py
20
21
22
23
class Entity(BaseModel):
    start_char: int
    end_char: int
    modifiers: List[Modifier]

start_char: int = None class-attribute

end_char: int = None class-attribute

modifiers: List[Modifier] = None class-attribute

find_matches(example)

Finds entities within the example.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
List[re.Match]

List of matches for entities.

Source code in edsnlp/utils/examples.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def find_matches(example: str) -> List[re.Match]:
    """
    Finds entities within the example.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    List[re.Match]
        List of matches for entities.
    """
    return list(entity_pattern.finditer(example))

parse_match(match)

Parse a regex match representing an entity.

PARAMETER DESCRIPTION
match

Match for an entity.

TYPE: re.Match

RETURNS DESCRIPTION
Match

Usable representation for the entity match.

Source code in edsnlp/utils/examples.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def parse_match(match: re.Match) -> Match:
    """
    Parse a regex match representing an entity.

    Parameters
    ----------
    match : re.Match
        Match for an entity.

    Returns
    -------
    Match
        Usable representation for the entity match.
    """

    lexical_variant = match.group()
    start_char = match.start()
    end_char = match.end()

    text = text_pattern.findall(lexical_variant)[0]
    modifiers = modifiers_pattern.findall(lexical_variant)[0]

    m = Match(start_char=start_char, end_char=end_char, text=text, modifiers=modifiers)

    return m

parse_example(example)

Parses an example : finds examples and removes the tags.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
Tuple[str, List[Entity]]

Cleaned text and extracted entities.

Source code in edsnlp/utils/examples.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def parse_example(example: str) -> Tuple[str, List[Entity]]:
    """
    Parses an example : finds examples and removes the tags.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    Tuple[str, List[Entity]]
        Cleaned text and extracted entities.
    """

    matches = [parse_match(match) for match in find_matches(example=example)]
    text = ""
    entities = []

    cursor = 0

    for match in matches:

        text += example[cursor : match.start_char]
        start_char = len(text)
        text += match.text
        end_char = len(text)

        cursor = match.end_char

        entity = Entity(
            start_char=start_char,
            end_char=end_char,
            modifiers=[
                Modifier.parse_obj(m.groupdict())
                for m in single_modifiers_pattern.finditer(match.modifiers)
            ],
        )

        entities.append(entity)

    text += example[cursor:]

    return text, entities
Back to top