Skip to content

edsnlp.utils.blocs

Utility that extracts code blocs and runs them.

Largely inspired by https://github.com/koaning/mktestdocs

check_outputs(code)

Looks for output patterns, and modifies the bloc:

  1. The preceding line becomes v = expr
  2. The output line becomes an assert statement
PARAMETER DESCRIPTION
code

Code block

TYPE: str

RETURNS DESCRIPTION
str

Modified code bloc with assert statements

Source code in edsnlp/utils/blocs.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def check_outputs(code: str) -> str:
    """
    Looks for output patterns, and modifies the bloc:

    1. The preceding line becomes `#!python v = expr`
    2. The output line becomes an `#!python assert` statement

    Parameters
    ----------
    code : str
        Code block

    Returns
    -------
    str
        Modified code bloc with assert statements
    """

    lines: List[str] = code.split("\n")
    code = []

    skip = False

    if len(lines) < 2:
        return code

    for expression, output in zip(lines[:-1], lines[1:]):
        if skip:
            skip = not skip
            continue

        if output.startswith(OUTPUT_PATTERN):
            expression = f"v = {expression}"

            output = output[len(OUTPUT_PATTERN) :].replace('"', r"\"")
            output = f'assert repr(v) == "{output}" or str(v) == "{output}"'

            code.append(expression)
            code.append(output)

            skip = True

        else:
            code.append(expression)

    if not skip:
        code.append(output)

    return "\n".join(code)

remove_indentation(code, indent)

Remove indentation from a code bloc.

PARAMETER DESCRIPTION
code

Code bloc

TYPE: str

indent

Level of indentation

TYPE: int

RETURNS DESCRIPTION
str

Modified code bloc

Source code in edsnlp/utils/blocs.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def remove_indentation(code: str, indent: int) -> str:
    """
    Remove indentation from a code bloc.

    Parameters
    ----------
    code : str
        Code bloc
    indent : int
        Level of indentation

    Returns
    -------
    str
        Modified code bloc
    """

    if not indent:
        return code

    lines = []

    for line in code.split("\n"):
        lines.append(line[indent:])

    return "\n".join(lines)

grab_code_blocks(docstring, lang='python')

Given a docstring, grab all the markdown codeblocks found in docstring.

PARAMETER DESCRIPTION
docstring

Full text.

TYPE: str

lang

Language to execute, by default "python"

TYPE: str, optional DEFAULT: 'python'

RETURNS DESCRIPTION
List[str]

Extracted code blocks

Source code in edsnlp/utils/blocs.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def grab_code_blocks(docstring: str, lang="python") -> List[str]:
    """
    Given a docstring, grab all the markdown codeblocks found in docstring.

    Parameters
    ----------
    docstring : str
        Full text.
    lang : str, optional
        Language to execute, by default "python"

    Returns
    -------
    List[str]
        Extracted code blocks
    """
    codeblocks = []

    for match in BLOCK_PATTERN.finditer(docstring):
        d = match.groupdict()

        if d["skip"]:
            continue

        if lang in d["title"]:
            code = remove_indentation(d["code"], len(d["indent"]))
            code = check_outputs(code)
            codeblocks.append(code)

    return codeblocks

printer(code)

Prints a code bloc with lines for easier debugging.

PARAMETER DESCRIPTION
code

Code bloc.

TYPE: str

Source code in edsnlp/utils/blocs.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def printer(code: str) -> None:
    """
    Prints a code bloc with lines for easier debugging.

    Parameters
    ----------
    code : str
        Code bloc.
    """
    lines = []
    for i, line in enumerate(code.split("\n")):
        lines.append(f"{i + 1:03}  {line}")

    print("\n".join(lines))

check_docstring(obj, lang='')

Given a function, test the contents of the docstring.

Source code in edsnlp/utils/blocs.py
148
149
150
151
152
153
154
155
156
157
158
def check_docstring(obj, lang=""):
    """
    Given a function, test the contents of the docstring.
    """
    for b in grab_code_blocks(obj.__doc__, lang=lang):
        try:
            exec(b, {"__MODULE__": "__main__"})
        except Exception:
            print(f"Error Encountered in `{obj.__name__}`. Caused by:\n")
            printer(b)
            raise

check_raw_string(raw, lang='python')

Given a raw string, test the contents.

Source code in edsnlp/utils/blocs.py
161
162
163
164
165
166
167
168
169
170
def check_raw_string(raw, lang="python"):
    """
    Given a raw string, test the contents.
    """
    for b in grab_code_blocks(raw, lang=lang):
        try:
            exec(b, {"__MODULE__": "__main__"})
        except Exception:
            printer(b)
            raise

check_md_file(path, memory=False)

Given a markdown file, parse the contents for Python code blocs and check that each independant bloc does not cause an error.

PARAMETER DESCRIPTION
path

Path to the markdown file to execute.

TYPE: Path

memory

Whether to keep results from one bloc to the next, by default False

TYPE: bool, optional DEFAULT: False

Source code in edsnlp/utils/blocs.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def check_md_file(path: Path, memory: bool = False) -> None:
    """
    Given a markdown file, parse the contents for Python code blocs
    and check that each independant bloc does not cause an error.

    Parameters
    ----------
    path : Path
        Path to the markdown file to execute.
    memory : bool, optional
        Whether to keep results from one bloc to the next, by default `#!python False`
    """
    text = Path(path).read_text()
    if memory:
        check_raw_file_full(text, lang="python")
    else:
        check_raw_string(text, lang="python")