Skip to content

eds_scikit.utils.checks

MissingConceptError

Bases: Exception

Exception raised when a concept is missing

Source code in eds_scikit/utils/checks.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class MissingConceptError(Exception):
    """Exception raised when a concept is missing"""

    def __init__(
        self,
        required_concepts: Union[List[str], List[Tuple[str, str]]],
        df_name: str = "",
    ):

        if all(isinstance(concept, tuple) for concept in required_concepts):
            to_display_per_concept = [
                f"- {concept} ({msg})" for concept, msg in required_concepts
            ]
        else:
            to_display_per_concept = [f"- {concept}" for concept in required_concepts]
        str_to_display = "\n".join(to_display_per_concept)

        if df_name:
            df_name = f" {df_name} "
        message = (
            f"The {df_name}DataFrame is missing some columns, "
            "namely:\n"
            f"{str_to_display}"
        )

        super().__init__(message)

MissingTableError

Bases: Exception

Exception raised when a table is missing in the Data

Source code in eds_scikit/utils/checks.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class MissingTableError(Exception):
    """Exception raised when a table is missing in the Data"""

    def __init__(
        self,
        required_tables: Union[List[str], List[Tuple[str, str]]],
        data_name: str = "",
    ):

        if all(isinstance(table, tuple) for table in required_tables):
            to_display_per_table = [
                f"- {table} ({msg})" for table, msg in required_tables
            ]
        else:
            to_display_per_table = [f"- {table}" for table in required_tables]
        str_to_display = "\n".join(to_display_per_table)

        if data_name:
            data_name = f" {data_name} "
        message = (
            f"The {data_name}Data is missing some tables, "
            "namely:\n"
            f"{str_to_display}"
        )

        super().__init__(message)

concept_checker

concept_checker(function: Callable, concepts: List[str] = None, only_adds_concepts: bool = True, *args, **kwargs) -> Any

Decorator to use on functions that - Takes a DataFrame as first argument - Adds a concept to it

The decorator checks: - If the first argument is a DataFrame - If the concepts to be added aren't already in the DataFrame - If the function correctly adds the concepts - If no additionnal columns are added (if only_adds_concepts is True)

If one of this checks fails, raises an error

Source code in eds_scikit/utils/checks.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@decorator
def concept_checker(
    function: Callable,
    concepts: List[str] = None,
    only_adds_concepts: bool = True,
    *args,
    **kwargs,
) -> Any:
    """
    Decorator to use on functions that
    - Takes a DataFrame as first argument
    - Adds a concept to it

    The decorator checks:
    - If the first argument is a DataFrame
    - If the concepts to be added aren't already in the DataFrame
    - If the function correctly adds the concepts
    - If no additionnal columns are added (if only_adds_concepts is True)

    If one of this checks fails, raises an error
    """
    # Is the first argument a DataFrame
    df = args[0]
    if (type(df) != ks.DataFrame) & (type(df) != pd.DataFrame):
        raise TypeError(
            f"The first argument of '{function.__module__}.{function.__name__}' "
            "should be a Pandas or Koalas DataFrame"
        )

    # Initial columns
    initial_cols = set(df.columns)

    # Is the concept already present
    if type(concepts) == str:
        concepts = [concepts]
    present_concepts = set(concepts) & set(df.columns)
    if present_concepts:
        raise ValueError(
            f"The concepts {present_concepts} are already present in the input dataframe "
            f"of  '{function.__module__}.{function.__name__}'.\n"
            "You can either rename the column(s) or delete them before running "
            "the function again."
        )

    result = function(*args, **kwargs)

    # Was the concept correctly added
    missing_concepts = set(concepts) - set(result.columns)
    if len(missing_concepts) > 0:
        raise ValueError(
            f"The concept(s) '{missing_concepts}' were not added to the DataFrame."
        )

    # Check that no other columns were added

    if only_adds_concepts:
        result_cols = set(result.columns)
        additionnal_cols = result_cols - (initial_cols | set(concepts))
        if additionnal_cols:
            logger.warning(
                "The columns"
                + "".join([f"\n- {s}" for s in additionnal_cols])
                + f"\nwere added/renamed by '{function.__module__}.{function.__name__}',"
                + f"although it should normally only add the columns {concepts}"
            )

    return result

algo_checker

algo_checker(function: Callable, algos: Optional[str] = None, *args, **kwargs) -> Any

Decorator to use on wrapper that calls specific functions based on the 'algo' argument

The decorator checks if the provided algo is an implemented one.

If this checks fails, raises an error

Source code in eds_scikit/utils/checks.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@decorator
def algo_checker(
    function: Callable,
    algos: Optional[str] = None,
    *args,
    **kwargs,
) -> Any:
    """
    Decorator to use on wrapper that calls specific functions based on the 'algo' argument

    The decorator checks if the provided algo is an implemented one.

    If this checks fails, raises an error
    """

    algo = _get_arg_value(function, "algo", args, kwargs)

    # Stripping eventual version suffix
    algo = algo.split(".")[0]

    if algo not in algos:
        raise ValueError(
            f"Method {algo} unknown for '{function.__module__}.{function.__name__}'.\n"
            f"Available algos are {algos}"
        )
    result = function(*args, **kwargs)
    return result