Skip to content

edsnlp.utils.resources

get_verbs(verbs=None, check_contains=True)

Extract verbs from the resources, as a pandas dataframe.

PARAMETER DESCRIPTION
verbs

List of verbs to keep. Returns all verbs by default.

TYPE: List[str], optional DEFAULT: None

check_contains

Whether to check that no verb is missing if a list of verbs was provided. By default True

TYPE: bool, optional DEFAULT: True

RETURNS DESCRIPTION
pd.DataFrame

DataFrame containing conjugated verbs.

Source code in edsnlp/utils/resources.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def get_verbs(
    verbs: Optional[List[str]] = None, check_contains: bool = True
) -> pd.DataFrame:
    """
    Extract verbs from the resources, as a pandas dataframe.

    Parameters
    ----------
    verbs : List[str], optional
        List of verbs to keep. Returns all verbs by default.
    check_contains : bool, optional
        Whether to check that no verb is missing if a list of verbs was provided.
        By default True

    Returns
    -------
    pd.DataFrame
        DataFrame containing conjugated verbs.
    """

    conjugated_verbs = pd.read_csv(BASE_DIR / "resources" / "verbs.csv.gz")

    if not verbs:
        return conjugated_verbs

    verbs = set(verbs)

    selected_verbs = conjugated_verbs[conjugated_verbs.verb.isin(verbs)]

    if check_contains:
        assert len(verbs) == selected_verbs.verb.nunique(), "Some verbs are missing !"

    return selected_verbs

get_adicap_dict() cached

RETURNS DESCRIPTION
Dict
Source code in edsnlp/utils/resources.py
46
47
48
49
50
51
52
53
54
55
56
57
@lru_cache()
def get_adicap_dict():
    """
    Returns
    -------
    Dict
    """

    with gzip.open(BASE_DIR / "resources" / "adicap.json.gz", "r") as fin:
        decode_dict = json.loads(fin.read().decode("utf-8"))

    return decode_dict