Skip to content

edsnlp.utils.examples

find_matches(example)

Finds entities within the example.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
List[re.Match]

List of matches for entities.

Source code in edsnlp/utils/examples.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def find_matches(example: str) -> List[re.Match]:
    """
    Finds entities within the example.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    List[re.Match]
        List of matches for entities.
    """
    return list(entity_pattern.finditer(example))

parse_match(match)

Parse a regex match representing an entity.

PARAMETER DESCRIPTION
match

Match for an entity.

TYPE: re.Match

RETURNS DESCRIPTION
Match

Usable representation for the entity match.

Source code in edsnlp/utils/examples.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def parse_match(match: re.Match) -> Match:
    """
    Parse a regex match representing an entity.

    Parameters
    ----------
    match : re.Match
        Match for an entity.

    Returns
    -------
    Match
        Usable representation for the entity match.
    """

    lexical_variant = match.group()
    start_char = match.start()
    end_char = match.end()

    text = text_pattern.findall(lexical_variant)[0]
    modifiers = modifiers_pattern.findall(lexical_variant)[0]

    m = Match(start_char=start_char, end_char=end_char, text=text, modifiers=modifiers)

    return m

parse_example(example)

Parses an example : finds examples and removes the tags.

PARAMETER DESCRIPTION
example

Example to process.

TYPE: str

RETURNS DESCRIPTION
Tuple[str, List[Entity]]

Cleaned text and extracted entities.

Source code in edsnlp/utils/examples.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def parse_example(example: str) -> Tuple[str, List[Entity]]:
    """
    Parses an example : finds examples and removes the tags.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    Tuple[str, List[Entity]]
        Cleaned text and extracted entities.
    """

    matches = [parse_match(match) for match in find_matches(example=example)]
    text = ""
    entities = []

    cursor = 0

    for match in matches:

        text += example[cursor : match.start_char]
        start_char = len(text)
        text += match.text
        end_char = len(text)
        modifiers = [m.split("=") for m in match.modifiers.split()]

        cursor = match.end_char

        entity = Entity(
            start_char=start_char,
            end_char=end_char,
            modifiers=[Modifier(key=k, value=v) for k, v in modifiers],
        )

        entities.append(entity)

    text += example[cursor:]

    return text, entities
Back to top