Skip to content

edsnlp.utils.examples

entity_pattern = re.compile('(<ent[^<>]*>[^<>]+</ent>)') module-attribute

text_pattern = re.compile('<ent.*>(.+)</ent>') module-attribute

modifiers_pattern = re.compile('<ent\\s?(.*)>.+</ent>') module-attribute

single_modifiers_pattern = regex.compile("(?P<key>[^\\s]+?)=((?P<value>{.*?})|(?P<value>[^\\s']+)|'(?P<value>.+?)')") module-attribute

Match

Bases: BaseModel

Source code in edsnlp/utils/examples.py
 9
10
11
12
13
class Match(BaseModel):
    start_char: int
    end_char: int
    text: str
    modifiers: str

start_char: int = None class-attribute

end_char: int = None class-attribute

text: str = None class-attribute

modifiers: str = None class-attribute

Modifier

Bases: BaseModel

Source code in edsnlp/utils/examples.py
16
17
18
19
20
21
22
23
24
25
26
27
class Modifier(BaseModel):
    key: str
    value: Union[int, float, bool, str, Dict[str, Any]]

    @validator("value")
    def optional_dict_parsing(cls, v):
        if isinstance(v, str):
            try:
                return json.loads(v.replace("'", '"'))
            except json.JSONDecodeError:
                return v
        return v

key: str = None class-attribute

value: Union[int, float, bool, str, Dict[str, Any]] = None class-attribute

optional_dict_parsing(v)

Source code in edsnlp/utils/examples.py
20
21
22
23
24
25
26
27
@validator("value")
def optional_dict_parsing(cls, v):
    if isinstance(v, str):
        try:
            return json.loads(v.replace("'", '"'))
        except json.JSONDecodeError:
            return v
    return v

Entity

Bases: BaseModel

Source code in edsnlp/utils/examples.py
30
31
32
33
class Entity(BaseModel):
    start_char: int
    end_char: int
    modifiers: List[Modifier]

start_char: int = None class-attribute

end_char: int = None class-attribute

modifiers: List[Modifier] = None class-attribute

find_matches(example)

Finds entities within the example.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
List[re.Match]

List of matches for entities.

Source code in edsnlp/utils/examples.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def find_matches(example: str) -> List[re.Match]:
    """
    Finds entities within the example.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    List[re.Match]
        List of matches for entities.
    """
    return list(entity_pattern.finditer(example))

parse_match(match)

Parse a regex match representing an entity.

PARAMETER DESCRIPTION
match

Match for an entity.

TYPE: re.Match

RETURNS DESCRIPTION
Match

Usable representation for the entity match.

Source code in edsnlp/utils/examples.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def parse_match(match: re.Match) -> Match:
    """
    Parse a regex match representing an entity.

    Parameters
    ----------
    match : re.Match
        Match for an entity.

    Returns
    -------
    Match
        Usable representation for the entity match.
    """

    lexical_variant = match.group()
    start_char = match.start()
    end_char = match.end()

    text = text_pattern.findall(lexical_variant)[0]
    modifiers = modifiers_pattern.findall(lexical_variant)[0]

    m = Match(start_char=start_char, end_char=end_char, text=text, modifiers=modifiers)

    return m

parse_example(example)

Parses an example : finds examples and removes the tags.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
Tuple[str, List[Entity]]

Cleaned text and extracted entities.

Source code in edsnlp/utils/examples.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def parse_example(example: str) -> Tuple[str, List[Entity]]:
    """
    Parses an example : finds examples and removes the tags.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    Tuple[str, List[Entity]]
        Cleaned text and extracted entities.
    """

    matches = [parse_match(match) for match in find_matches(example=example)]
    text = ""
    entities = []

    cursor = 0

    for match in matches:

        text += example[cursor : match.start_char]
        start_char = len(text)
        text += match.text
        end_char = len(text)

        cursor = match.end_char

        entity = Entity(
            start_char=start_char,
            end_char=end_char,
            modifiers=[
                Modifier.parse_obj(m.groupdict())
                for m in single_modifiers_pattern.finditer(match.modifiers)
            ],
        )

        entities.append(entity)

    text += example[cursor:]

    return text, entities