`edsnlp.utils`

`colors`

`CATEGORY20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']` `module-attribute`

`create_colors(labels)`

Assign a colour for each label, using category20 palette. The method loops over the colour palette in case there are too many labels.

PARAMETER DESCRIPTION

labels

List of labels to colorise in displacy.

TYPE: List[str]

RETURNS	DESCRIPTION
`Dict[str, str]`	A displacy-compatible colour assignment.

Source code in edsnlp/utils/colors.py

def create_colors(labels: List[str]) -> Dict[str, str]:
    """
    Assign a colour for each label, using category20 palette.
    The method loops over the colour palette in case there are too many labels.

    Parameters
    ----------
    labels : List[str]
        List of labels to colorise in displacy.

    Returns
    -------
    Dict[str, str]
        A displacy-compatible colour assignment.
    """

    colors = {label: cat for label, cat in zip(labels, cycle(CATEGORY20))}

    return colors

`deprecation`

`deprecated_extension(name, new_name)`

Source code in edsnlp/utils/deprecation.py

def deprecated_extension(name: str, new_name: str) -> None:
    msg = (
        f'The extension "{name}" is deprecated and will be '
        "removed in a future version. "
        f'Please use "{new_name}" instead.'
    )

    logger.warning(msg)

`deprecated_getter_factory(name, new_name)`

Source code in edsnlp/utils/deprecation.py

def deprecated_getter_factory(name: str, new_name: str) -> Callable:
    def getter(toklike: Union[Token, Span, Doc]) -> Any:

        n = f"{type(toklike).__name__}._.{name}"
        nn = f"{type(toklike).__name__}._.{new_name}"

        deprecated_extension(n, nn)

        return getattr(toklike._, new_name)

    return getter

`deprecation(name, new_name=None)`

Source code in edsnlp/utils/deprecation.py

def deprecation(name: str, new_name: Optional[str] = None):

    new_name = new_name or f"eds.{name}"

    msg = (
        f'Calling "{name}" directly is deprecated and '
        "will be removed in a future version. "
        f'Please use "{new_name}" instead.'
    )

    logger.warning(msg)

`deprecated_factory(name, new_name=None, default_config=None, func=None)`

Execute the Language.factory method on a modified factory function. The modification adds a deprecation warning.

PARAMETER	DESCRIPTION
`name`	The deprecated name for the pipeline TYPE: `str`
`new_name`	The new name for the pipeline, which should be used, by default None TYPE: `Optional[str], optional` DEFAULT: `None`
`default_config`	The configuration that should be passed to Language.factory, by default None TYPE: `Optional[Dict[str, Any]], optional` DEFAULT: `None`
`func`	The function to decorate, by default None TYPE: `Optional[Callable], optional` DEFAULT: `None`

RETURNS	DESCRIPTION
`Callable`

Source code in edsnlp/utils/deprecation.py

def deprecated_factory(
    name: str,
    new_name: Optional[str] = None,
    default_config: Optional[Dict[str, Any]] = None,
    func: Optional[Callable] = None,
) -> Callable:
    """
    Execute the Language.factory method on a modified factory function.
    The modification adds a deprecation warning.

    Parameters
    ----------
    name : str
        The deprecated name for the pipeline
    new_name : Optional[str], optional
        The new name for the pipeline, which should be used, by default None
    default_config : Optional[Dict[str, Any]], optional
        The configuration that should be passed to Language.factory, by default None
    func : Optional[Callable], optional
        The function to decorate, by default None

    Returns
    -------
    Callable
    """

    if default_config is None:
        default_config = dict()

    wrapper = Language.factory(name, default_config=default_config)

    def wrap(factory):

        # Define decorator
        # We use micheles' decorator package to keep the same signature
        # See https://github.com/micheles/decorator/
        @decorator
        def decorate(
            f,
            *args,
            **kwargs,
        ):
            deprecation(name, new_name)
            return f(
                *args,
                **kwargs,
            )

        decorated = decorate(factory)

        wrapper(decorated)

        return factory

    if func is not None:
        return wrap(func)

    return wrap

`examples`

`entity_pattern = re.compile('(<ent[^<>]*>[^<>]+</ent>)')` `module-attribute`

`text_pattern = re.compile('<ent.*>(.+)</ent>')` `module-attribute`

`modifiers_pattern = re.compile('<ent\\s?(.*)>.+</ent>')` `module-attribute`

`Match`

Bases: BaseModel

Source code in edsnlp/utils/examples.py

class Match(BaseModel):
    start_char: int
    end_char: int
    text: str
    modifiers: str

`start_char: int = None` `class-attribute`

`end_char: int = None` `class-attribute`

`text: str = None` `class-attribute`

`modifiers: str = None` `class-attribute`

`Modifier`

Bases: BaseModel

Source code in edsnlp/utils/examples.py

14
15
16

class Modifier(BaseModel):
    key: str
    value: Union[int, float, bool, str]

`key: str = None` `class-attribute`

`value: Union[int, float, bool, str] = None` `class-attribute`

`Entity`

Bases: BaseModel

Source code in edsnlp/utils/examples.py

class Entity(BaseModel):
    start_char: int
    end_char: int
    modifiers: List[Modifier]

`start_char: int = None` `class-attribute`

`end_char: int = None` `class-attribute`

`modifiers: List[Modifier] = None` `class-attribute`

`find_matches(example)`

Finds entities within the example.

PARAMETER DESCRIPTION

example

Example to process.

TYPE: str

RETURNS	DESCRIPTION
`List[re.Match]`	List of matches for entities.

Source code in edsnlp/utils/examples.py

def find_matches(example: str) -> List[re.Match]:
    """
    Finds entities within the example.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    List[re.Match]
        List of matches for entities.
    """
    return list(entity_pattern.finditer(example))

`parse_match(match)`

Parse a regex match representing an entity.

PARAMETER DESCRIPTION

match

Match for an entity.

TYPE: re.Match

RETURNS	DESCRIPTION
`Match`	Usable representation for the entity match.

Source code in edsnlp/utils/examples.py

def parse_match(match: re.Match) -> Match:
    """
    Parse a regex match representing an entity.

    Parameters
    ----------
    match : re.Match
        Match for an entity.

    Returns
    -------
    Match
        Usable representation for the entity match.
    """

    lexical_variant = match.group()
    start_char = match.start()
    end_char = match.end()

    text = text_pattern.findall(lexical_variant)[0]
    modifiers = modifiers_pattern.findall(lexical_variant)[0]

    m = Match(start_char=start_char, end_char=end_char, text=text, modifiers=modifiers)

    return m

`parse_example(example)`

Parses an example : finds examples and removes the tags.

PARAMETER DESCRIPTION

example

Example to process.

TYPE: str

RETURNS	DESCRIPTION
`Tuple[str, List[Entity]]`	Cleaned text and extracted entities.

Source code in edsnlp/utils/examples.py

def parse_example(example: str) -> Tuple[str, List[Entity]]:
    """
    Parses an example : finds examples and removes the tags.

    Parameters
    ----------
    example : str
        Example to process.

    Returns
    -------
    Tuple[str, List[Entity]]
        Cleaned text and extracted entities.
    """

    matches = [parse_match(match) for match in find_matches(example=example)]
    text = ""
    entities = []

    cursor = 0

    for match in matches:

        text += example[cursor : match.start_char]
        start_char = len(text)
        text += match.text
        end_char = len(text)
        modifiers = [m.split("=") for m in match.modifiers.split()]

        cursor = match.end_char

        entity = Entity(
            start_char=start_char,
            end_char=end_char,
            modifiers=[Modifier(key=k, value=v) for k, v in modifiers],
        )

        entities.append(entity)

    text += example[cursor:]

    return text, entities

`filter`

`default_sort_key(span)`

Returns the sort key for filtering spans.

PARAMETER DESCRIPTION

span

Span to sort.

TYPE: Span

RETURNS DESCRIPTION

key

Sort key.

TYPE: Tuple(int, int)

Source code in edsnlp/utils/filter.py

def default_sort_key(span: Span) -> Tuple[int, int]:
    """
    Returns the sort key for filtering spans.

    Parameters
    ----------
    span : Span
        Span to sort.

    Returns
    -------
    key : Tuple(int, int)
        Sort key.
    """
    if isinstance(span, tuple):
        span = span[0]
    return span.end - span.start, -span.start

`start_sort_key(span)`

Returns the sort key for filtering spans by start order.

PARAMETER DESCRIPTION

span

Span to sort.

TYPE: Span

RETURNS DESCRIPTION

key

Sort key.

TYPE: Tuple(int, int)

Source code in edsnlp/utils/filter.py

def start_sort_key(span: Union[Span, Tuple[Span, Any]]) -> Tuple[int, int]:
    """
    Returns the sort key for filtering spans by start order.

    Parameters
    ----------
    span : Span
        Span to sort.

    Returns
    -------
    key : Tuple(int, int)
        Sort key.
    """
    if isinstance(span, tuple):
        span = span[0]
    return span.start

`filter_spans(spans, label_to_remove=None, return_discarded=False, sort_key=default_sort_key)`

Re-definition of spacy's filtering function, that returns discarded spans as well as filtered ones.

Can also accept a label_to_remove argument, useful for filtering out pseudo cues. If set, results can contain overlapping spans: only spans overlapping with excluded labels are removed. The main expected use case is for pseudo-cues.

It can handle an iterable of tuples instead of an iterable of Spans. The primary use-case is the use with the RegexMatcher's capacity to return the span's groupdict.

The spaCy documentation states:

Filter a sequence of spans and remove duplicates or overlaps. Useful for creating named entities (where one token can only be part of one entity) or when merging spans with Retokenizer.merge. When spans overlap, the (first) longest span is preferred over shorter spans.

Filtering out spans

If the label_to_remove argument is supplied, it might be tempting to filter overlapping spans that are not part of a label to remove.

The reason we keep all other possibly overlapping labels is that in qualifier pipelines, the same cue can precede and follow a marked entity. Hence we need to keep every example.

PARAMETER	DESCRIPTION
`spans`	Spans to filter. TYPE: `Iterable[Union["Span", Tuple["Span", Any]]]`
`return_discarded`	Whether to return discarded spans. TYPE: `bool` DEFAULT: `False`
`label_to_remove`	Label to remove. If set, results can contain overlapping spans. TYPE: `str, optional` DEFAULT: `None`
`sort_key`	Key to sorting spans before applying overlap conflict resolution. A span with a higher key will have precedence over another span. By default, the largest, leftmost spans are selected first. TYPE: `Callable[Span, Any], optional` DEFAULT: `default_sort_key`

RETURNS DESCRIPTION

results

Filtered spans

TYPE: List[Union[Span, Tuple[Span, Any]]]

discarded

Discarded spans

TYPE: List[Union[Span, Tuple[Span, Any]]], optional

Source code in edsnlp/utils/filter.py

def filter_spans(
    spans: Iterable[Union["Span", Tuple["Span", Any]]],
    label_to_remove: Optional[str] = None,
    return_discarded: bool = False,
    sort_key: Callable[[Span], Any] = default_sort_key,
) -> Union[
    List[Union[Span, Tuple[Span, Any]]],
    Tuple[List[Union[Span, Tuple[Span, Any]]], List[Union[Span, Tuple[Span, Any]]]],
]:
    """
    Re-definition of spacy's filtering function, that returns discarded spans
    as well as filtered ones.

    Can also accept a `label_to_remove` argument, useful for filtering out
    pseudo cues. If set, `results` can contain overlapping spans: only
    spans overlapping with excluded labels are removed. The main expected
    use case is for pseudo-cues.

    It can handle an iterable of tuples instead of an iterable of `Span`s.
    The primary use-case is the use with the `RegexMatcher`'s capacity to
    return the span's `groupdict`.

    !!! note ""

        The **spaCy documentation states**:

        > Filter a sequence of spans and remove duplicates or overlaps.
        > Useful for creating named entities (where one token can only
        > be part of one entity) or when merging spans with
        > `Retokenizer.merge`. When spans overlap, the (first)
        > longest span is preferred over shorter spans.

    !!! danger "Filtering out spans"

        If the `label_to_remove` argument is supplied, it might be tempting to
        filter overlapping spans that are not part of a label to remove.

        The reason we keep all other possibly overlapping labels is that in qualifier
        pipelines, the same cue can precede **and** follow a marked entity.
        Hence we need to keep every example.

    Parameters
    ----------
    spans : Iterable[Union["Span", Tuple["Span", Any]]]
        Spans to filter.
    return_discarded : bool
        Whether to return discarded spans.
    label_to_remove : str, optional
        Label to remove. If set, results can contain overlapping spans.
    sort_key : Callable[Span, Any], optional
        Key to sorting spans before applying overlap conflict resolution.
        A span with a higher key will have precedence over another span.
        By default, the largest, leftmost spans are selected first.

    Returns
    -------
    results : List[Union[Span, Tuple[Span, Any]]]
        Filtered spans
    discarded : List[Union[Span, Tuple[Span, Any]]], optional
        Discarded spans
    """
    sorted_spans = sorted(spans, key=sort_key, reverse=True)
    result = []
    discarded = []
    seen_tokens = set()
    for span in sorted_spans:
        s = span if isinstance(span, Span) else span[0]
        # Check for end - 1 here because boundaries are inclusive
        if s.start not in seen_tokens and s.end - 1 not in seen_tokens:
            if label_to_remove is None or s.label_ != label_to_remove:
                result.append(span)
            if label_to_remove is None or s.label_ == label_to_remove:
                seen_tokens.update(range(s.start, s.end))
        elif label_to_remove is None or s.label_ != label_to_remove:
            discarded.append(span)

    result = sorted(result, key=start_sort_key)
    discarded = sorted(discarded, key=start_sort_key)

    if return_discarded:
        return result, discarded

    return result

`consume_spans(spans, filter, second_chance=None)`

Consume a list of span, according to a filter.

Warning

This method makes the hard hypothesis that:

Spans are sorted.
Spans are consumed in sequence and only once.

The second item is problematic for the way we treat long entities, hence the second_chance parameter, which lets entities be seen more than once.

PARAMETER DESCRIPTION

spans

List of spans to filter

TYPE: List of spans

filter

Filtering function. Should return True when the item is to be included.

TYPE: Callable

second_chance

Optional list of spans to include again (useful for long entities), by default None

TYPE: List of spans, optional DEFAULT: None

RETURNS DESCRIPTION

matches

List of spans consumed by the filter.

TYPE: List of spans

remainder

List of remaining spans in the original spans parameter.

TYPE: List of spans

Source code in edsnlp/utils/filter.py

def consume_spans(
    spans: List[Span],
    filter: Callable,
    second_chance: Optional[List[Span]] = None,
) -> Tuple[List[Span], List[Span]]:
    """
    Consume a list of span, according to a filter.

    !!! warning
        This method makes the hard hypothesis that:

        1. Spans are sorted.
        2. Spans are consumed in sequence and only once.

        The second item is problematic for the way we treat long entities,
        hence the `second_chance` parameter, which lets entities be seen
        more than once.

    Parameters
    ----------
    spans : List of spans
        List of spans to filter
    filter : Callable
        Filtering function. Should return True when the item is to be included.
    second_chance : List of spans, optional
        Optional list of spans to include again (useful for long entities),
        by default None

    Returns
    -------
    matches : List of spans
        List of spans consumed by the filter.
    remainder : List of spans
        List of remaining spans in the original `spans` parameter.
    """

    if not second_chance:
        second_chance = []
    else:
        second_chance = [m for m in second_chance if filter(m)]

    if not spans:
        return second_chance, []

    for i, span in enumerate(spans):
        if not filter(span):
            break
        else:
            i += 1

    matches = spans[:i]
    remainder = spans[i:]

    matches.extend(second_chance)

    return matches, remainder

`get_spans(spans, label)`

Extracts spans with a given label. Prefer using hash label for performance reasons.

PARAMETER DESCRIPTION

spans

List of spans to filter.

TYPE: List[Span]

label

Label to filter on.

TYPE: Union[int, str]

RETURNS	DESCRIPTION
`List[Span]`	Filtered spans.

Source code in edsnlp/utils/filter.py

def get_spans(spans: List[Span], label: Union[int, str]) -> List[Span]:
    """
    Extracts spans with a given label.
    Prefer using hash label for performance reasons.

    Parameters
    ----------
    spans : List[Span]
        List of spans to filter.
    label : Union[int, str]
        Label to filter on.

    Returns
    -------
    List[Span]
        Filtered spans.
    """
    if isinstance(label, int):
        return [span for span in spans if span.label == label]
    else:
        return [span for span in spans if span.label_ == label]

`resources`

`get_verbs(verbs=None, check_contains=True)`

Extract verbs from the resources, as a pandas dataframe.

PARAMETER DESCRIPTION

verbs

List of verbs to keep. Returns all verbs by default.

TYPE: List[str], optional DEFAULT: None

check_contains

Whether to check that no verb is missing if a list of verbs was provided. By default True

TYPE: bool, optional DEFAULT: True

RETURNS	DESCRIPTION
`pd.DataFrame`	DataFrame containing conjugated verbs.

Source code in edsnlp/utils/resources.py

def get_verbs(
    verbs: Optional[List[str]] = None, check_contains: bool = True
) -> pd.DataFrame:
    """
    Extract verbs from the resources, as a pandas dataframe.

    Parameters
    ----------
    verbs : List[str], optional
        List of verbs to keep. Returns all verbs by default.
    check_contains : bool, optional
        Whether to check that no verb is missing if a list of verbs was provided.
        By default True

    Returns
    -------
    pd.DataFrame
        DataFrame containing conjugated verbs.
    """

    conjugated_verbs = pd.read_csv(BASE_DIR / "resources" / "verbs.csv")

    if not verbs:
        return conjugated_verbs

    verbs = set(verbs)

    selected_verbs = conjugated_verbs[conjugated_verbs.verb.isin(verbs)]

    if check_contains:
        assert len(verbs) == selected_verbs.verb.nunique(), "Some verbs are missing !"

    return selected_verbs

`regex`

`make_pattern(patterns, with_breaks=False, name=None)`

Create OR pattern from a list of patterns.

PARAMETER DESCRIPTION

patterns

List of patterns to merge.

TYPE: List[str]

with_breaks

Whether to add breaks (\b) on each side, by default False

TYPE: bool, optional DEFAULT: False

name

Name of the group, using regex ?P<> directive.

TYPE: Optional[str] DEFAULT: None

RETURNS	DESCRIPTION
`str`	Merged pattern.

Source code in edsnlp/utils/regex.py

def make_pattern(
    patterns: List[str],
    with_breaks: bool = False,
    name: Optional[str] = None,
) -> str:
    r"""
    Create OR pattern from a list of patterns.

    Parameters
    ----------
    patterns : List[str]
        List of patterns to merge.
    with_breaks : bool, optional
        Whether to add breaks (`\b`) on each side, by default False
    name: str, optional
        Name of the group, using regex `?P<>` directive.

    Returns
    -------
    str
        Merged pattern.
    """

    if name:
        prefix = f"(?P<{name}>"
    else:
        prefix = "("

    # Sorting by length might be more efficient
    patterns.sort(key=len, reverse=True)

    pattern = prefix + "|".join(patterns) + ")"

    if with_breaks:
        pattern = r"\b" + pattern + r"\b"

    return pattern

`compile_regex(reg)`

This function tries to compile reg using the re module, and fallbacks to the regex module that is more permissive.

PARAMETER	DESCRIPTION
`reg`

RETURNS	DESCRIPTION
`Union[re.Pattern, regex.Pattern]`

Source code in edsnlp/utils/regex.py

def compile_regex(reg):
    """
    This function tries to compile `reg`  using the `re` module, and
    fallbacks to the `regex` module that is more permissive.

    Parameters
    ----------
    reg: str

    Returns
    -------
    Union[re.Pattern, regex.Pattern]
    """
    try:
        return re.compile(reg)
    except re.error:
        try:
            return regex.compile(reg)
        except regex.error:
            raise Exception("Could not compile: {}".format(repr(reg)))

`inclusion`

`check_inclusion(span, start, end)`

Checks whether the span overlaps the boundaries.

PARAMETER DESCRIPTION

span

Span to check.

TYPE: Span

start

Start of the boundary

TYPE: int

end

End of the boundary

TYPE: int

RETURNS	DESCRIPTION
`bool`	Whether the span overlaps the boundaries.

Source code in edsnlp/utils/inclusion.py

def check_inclusion(span: Span, start: int, end: int) -> bool:
    """
    Checks whether the span overlaps the boundaries.

    Parameters
    ----------
    span : Span
        Span to check.
    start : int
        Start of the boundary
    end : int
        End of the boundary

    Returns
    -------
    bool
        Whether the span overlaps the boundaries.
    """

    if span.start >= end or span.end <= start:
        return False
    return True

`blocs`

Utility that extracts code blocs and runs them.

Largely inspired by https://github.com/koaning/mktestdocs

BLOCK_PATTERN = re.compile('((?P<skip>)\\s+)?(?P<indent> )```(?P<title>.?)\\n(?P<code>.+?)```', flags=re.DOTALL) `module-attribute`

`OUTPUT_PATTERN = '# Out: '` `module-attribute`

`check_outputs(code)`

Looks for output patterns, and modifies the bloc:

The preceding line becomes v = expr
The output line becomes an assert statement

PARAMETER DESCRIPTION

code

Code block

TYPE: str

RETURNS	DESCRIPTION
`str`	Modified code bloc with assert statements

Source code in edsnlp/utils/blocs.py

def check_outputs(code: str) -> str:
    """
    Looks for output patterns, and modifies the bloc:

    1. The preceding line becomes `#!python v = expr`
    2. The output line becomes an `#!python assert` statement

    Parameters
    ----------
    code : str
        Code block

    Returns
    -------
    str
        Modified code bloc with assert statements
    """

    lines: List[str] = code.split("\n")
    code = []

    skip = False

    if len(lines) < 2:
        return code

    for expression, output in zip(lines[:-1], lines[1:]):
        if skip:
            skip = not skip
            continue

        if output.startswith(OUTPUT_PATTERN):
            expression = f"v = {expression}"

            output = output[len(OUTPUT_PATTERN) :].replace('"', r"\"")
            output = f'assert repr(v) == "{output}" or str(v) == "{output}"'

            code.append(expression)
            code.append(output)

            skip = True

        else:
            code.append(expression)

    if not skip:
        code.append(output)

    return "\n".join(code)

`remove_indentation(code, indent)`

Remove indentation from a code bloc.

PARAMETER DESCRIPTION

code

Code bloc

TYPE: str

indent

Level of indentation

TYPE: int

RETURNS	DESCRIPTION
`str`	Modified code bloc

Source code in edsnlp/utils/blocs.py

def remove_indentation(code: str, indent: int) -> str:
    """
    Remove indentation from a code bloc.

    Parameters
    ----------
    code : str
        Code bloc
    indent : int
        Level of indentation

    Returns
    -------
    str
        Modified code bloc
    """

    if not indent:
        return code

    lines = []

    for line in code.split("\n"):
        lines.append(line[indent:])

    return "\n".join(lines)

`grab_code_blocks(docstring, lang='python')`

Given a docstring, grab all the markdown codeblocks found in docstring.

PARAMETER DESCRIPTION

docstring

Full text.

TYPE: str

lang

Language to execute, by default "python"

TYPE: str, optional DEFAULT: 'python'

RETURNS	DESCRIPTION
`List[str]`	Extracted code blocks

Source code in edsnlp/utils/blocs.py

def grab_code_blocks(docstring: str, lang="python") -> List[str]:
    """
    Given a docstring, grab all the markdown codeblocks found in docstring.

    Parameters
    ----------
    docstring : str
        Full text.
    lang : str, optional
        Language to execute, by default "python"

    Returns
    -------
    List[str]
        Extracted code blocks
    """
    codeblocks = []

    for match in BLOCK_PATTERN.finditer(docstring):
        d = match.groupdict()

        if d["skip"]:
            continue

        if lang in d["title"]:
            code = remove_indentation(d["code"], len(d["indent"]))
            code = check_outputs(code)
            codeblocks.append(code)

    return codeblocks

`printer(code)`

Prints a code bloc with lines for easier debugging.

PARAMETER DESCRIPTION

code

Code bloc.

TYPE: str

Source code in edsnlp/utils/blocs.py

def printer(code: str) -> None:
    """
    Prints a code bloc with lines for easier debugging.

    Parameters
    ----------
    code : str
        Code bloc.
    """
    lines = []
    for i, line in enumerate(code.split("\n")):
        lines.append(f"{i + 1:03}  {line}")

    print("\n".join(lines))

`check_docstring(obj, lang='')`

Given a function, test the contents of the docstring.

Source code in edsnlp/utils/blocs.py

def check_docstring(obj, lang=""):
    """
    Given a function, test the contents of the docstring.
    """
    for b in grab_code_blocks(obj.__doc__, lang=lang):
        try:
            exec(b, {"__MODULE__": "__main__"})
        except Exception:
            print(f"Error Encountered in `{obj.__name__}`. Caused by:\n")
            printer(b)
            raise

`check_raw_string(raw, lang='python')`

Given a raw string, test the contents.

Source code in edsnlp/utils/blocs.py

def check_raw_string(raw, lang="python"):
    """
    Given a raw string, test the contents.
    """
    for b in grab_code_blocks(raw, lang=lang):
        try:
            exec(b, {"__MODULE__": "__main__"})
        except Exception:
            printer(b)
            raise

`check_raw_file_full(raw, lang='python')`

Source code in edsnlp/utils/blocs.py

def check_raw_file_full(raw, lang="python"):
    all_code = "\n".join(grab_code_blocks(raw, lang=lang))
    try:
        exec(all_code, {"__MODULE__": "__main__"})
    except Exception:
        printer(all_code)
        raise

`check_md_file(path, memory=False)`

Given a markdown file, parse the contents for Python code blocs and check that each independant bloc does not cause an error.

PARAMETER DESCRIPTION

path

Path to the markdown file to execute.

TYPE: Path

memory

Whether to keep results from one bloc to the next, by default False

TYPE: bool, optional DEFAULT: False

Source code in edsnlp/utils/blocs.py

def check_md_file(path: Path, memory: bool = False) -> None:
    """
    Given a markdown file, parse the contents for Python code blocs
    and check that each independant bloc does not cause an error.

    Parameters
    ----------
    path : Path
        Path to the markdown file to execute.
    memory : bool, optional
        Whether to keep results from one bloc to the next, by default `#!python False`
    """
    text = Path(path).read_text()
    if memory:
        check_raw_file_full(text, lang="python")
    else:
        check_raw_string(text, lang="python")

edsnlp.utils

colors

CATEGORY20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5'] module-attribute

create_colors(labels)

deprecation

deprecated_extension(name, new_name)

deprecated_getter_factory(name, new_name)

deprecation(name, new_name=None)

deprecated_factory(name, new_name=None, default_config=None, func=None)

examples

entity_pattern = re.compile('(<ent[^<>]*>[^<>]+</ent>)') module-attribute

text_pattern = re.compile('<ent.*>(.+)</ent>') module-attribute

modifiers_pattern = re.compile('<ent\\s?(.*)>.+</ent>') module-attribute

Match

start_char: int = None class-attribute

end_char: int = None class-attribute

text: str = None class-attribute

modifiers: str = None class-attribute

Modifier

key: str = None class-attribute

value: Union[int, float, bool, str] = None class-attribute

Entity

start_char: int = None class-attribute

end_char: int = None class-attribute

modifiers: List[Modifier] = None class-attribute

find_matches(example)

parse_match(match)

parse_example(example)

filter

default_sort_key(span)

start_sort_key(span)

filter_spans(spans, label_to_remove=None, return_discarded=False, sort_key=default_sort_key)

consume_spans(spans, filter, second_chance=None)

get_spans(spans, label)

resources

get_verbs(verbs=None, check_contains=True)

regex

make_pattern(patterns, with_breaks=False, name=None)

compile_regex(reg)

inclusion

check_inclusion(span, start, end)

blocs

BLOCK_PATTERN = re.compile('((?P<skip>)\\s+)?(?P<indent> *)```(?P<title>.*?)\\n(?P<code>.+?)```', flags=re.DOTALL) module-attribute

OUTPUT_PATTERN = '# Out: ' module-attribute

check_outputs(code)

remove_indentation(code, indent)

grab_code_blocks(docstring, lang='python')

printer(code)

check_docstring(obj, lang='')

check_raw_string(raw, lang='python')

check_raw_file_full(raw, lang='python')

check_md_file(path, memory=False)

`edsnlp.utils`

`colors`

`CATEGORY20 = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']` `module-attribute`

`create_colors(labels)`

`deprecation`

`deprecated_extension(name, new_name)`

`deprecated_getter_factory(name, new_name)`

`deprecation(name, new_name=None)`

`deprecated_factory(name, new_name=None, default_config=None, func=None)`

`examples`

`entity_pattern = re.compile('(<ent[^<>]*>[^<>]+</ent>)')` `module-attribute`

`text_pattern = re.compile('<ent.*>(.+)</ent>')` `module-attribute`

`modifiers_pattern = re.compile('<ent\\s?(.*)>.+</ent>')` `module-attribute`

`Match`

`start_char: int = None` `class-attribute`

`end_char: int = None` `class-attribute`

`text: str = None` `class-attribute`

`modifiers: str = None` `class-attribute`

`Modifier`

`key: str = None` `class-attribute`

`value: Union[int, float, bool, str] = None` `class-attribute`

`Entity`

`start_char: int = None` `class-attribute`

`end_char: int = None` `class-attribute`

`modifiers: List[Modifier] = None` `class-attribute`

`find_matches(example)`

`parse_match(match)`

`parse_example(example)`

`filter`

`default_sort_key(span)`

`start_sort_key(span)`

`filter_spans(spans, label_to_remove=None, return_discarded=False, sort_key=default_sort_key)`

`consume_spans(spans, filter, second_chance=None)`

`get_spans(spans, label)`

`resources`

`get_verbs(verbs=None, check_contains=True)`

`regex`

`make_pattern(patterns, with_breaks=False, name=None)`

`compile_regex(reg)`

`inclusion`

`check_inclusion(span, start, end)`

`blocs`

BLOCK_PATTERN = re.compile('((?P<skip>)\\s+)?(?P<indent> )```(?P<title>.?)\\n(?P<code>.+?)```', flags=re.DOTALL) `module-attribute`

`OUTPUT_PATTERN = '# Out: '` `module-attribute`

`check_outputs(code)`

`remove_indentation(code, indent)`

`grab_code_blocks(docstring, lang='python')`

`printer(code)`

`check_docstring(obj, lang='')`

`check_raw_string(raw, lang='python')`

`check_raw_file_full(raw, lang='python')`

`check_md_file(path, memory=False)`