Skip to content

edsnlp.utils.regex

make_pattern(patterns, with_breaks=False, name=None)

Create OR pattern from a list of patterns.

PARAMETER DESCRIPTION
patterns

List of patterns to merge.

TYPE: List[str]

with_breaks

Whether to add breaks (\b) on each side, by default False

TYPE: bool, optional DEFAULT: False

name

Name of the group, using regex ?P<> directive.

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
str

Merged pattern.

Source code in edsnlp/utils/regex.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def make_pattern(
    patterns: List[str],
    with_breaks: bool = False,
    name: Optional[str] = None,
) -> str:
    r"""
    Create OR pattern from a list of patterns.

    Parameters
    ----------
    patterns : List[str]
        List of patterns to merge.
    with_breaks : bool, optional
        Whether to add breaks (`\b`) on each side, by default False
    name: str, optional
        Name of the group, using regex `?P<>` directive.

    Returns
    -------
    str
        Merged pattern.
    """

    if name:
        prefix = f"(?P<{name}>"
    else:
        prefix = "("

    # Sorting by length might be more efficient
    patterns.sort(key=len, reverse=True)

    pattern = prefix + "|".join(patterns) + ")"

    if with_breaks:
        pattern = r"\b" + pattern + r"\b"

    return pattern

compile_regex(reg)

This function tries to compile reg using the re module, and fallbacks to the regex module that is more permissive.

PARAMETER DESCRIPTION
reg

RETURNS DESCRIPTION
Union[re.Pattern, regex.Pattern]
Source code in edsnlp/utils/regex.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def compile_regex(reg):
    """
    This function tries to compile `reg`  using the `re` module, and
    fallbacks to the `regex` module that is more permissive.

    Parameters
    ----------
    reg: str

    Returns
    -------
    Union[re.Pattern, regex.Pattern]
    """
    try:
        return re.compile(reg)
    except re.error:
        try:
            return regex.compile(reg)
        except regex.error:
            raise Exception("Could not compile: {}".format(repr(reg)))
Back to top