Skip to content

eds_scikit.utils.custom_implem.custom_implem

CustomImplem

A collection of custom pandas and koalas methods.

All public facing methods must be stateless and defined as classmethods.

add_unique_id classmethod

add_unique_id(obj: Any, col_name: str = 'id', backend = None) -> Any

Add an ID column for koalas or pandas.

Source code in eds_scikit/utils/custom_implem/custom_implem.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
@classmethod
def add_unique_id(
    cls,
    obj: Any,
    col_name: str = "id",
    backend=None,
) -> Any:
    """Add an ID column for koalas or pandas."""
    if backend is pd:
        obj[col_name] = range(obj.shape[0])
        return obj
    elif backend is ks:
        return obj.koalas.attach_id_column(id_type="distributed", column=col_name)
    else:
        raise NotImplementedError(
            f"No method 'add_unique_id' is available for backend '{backend}'."
        )

cut classmethod

cut(x, bins, right: bool = True, labels = None, retbins: bool = False, precision: int = 3, include_lowest: bool = False, duplicates: str = 'raise', ordered: bool = True, backend = None)

koalas version of pd.cut

Notes

Simplified vendoring from: https://github.com/pandas-dev/pandas/blob/v1.5.2/pandas/core/reshape/tile.py#L50-L305

Source code in eds_scikit/utils/custom_implem/custom_implem.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@classmethod
def cut(
    cls,
    x,
    bins,
    right: bool = True,
    labels=None,
    retbins: bool = False,
    precision: int = 3,
    include_lowest: bool = False,
    duplicates: str = "raise",
    ordered: bool = True,
    backend=None,  # unused because koalas only
):
    """koalas version of pd.cut

    Notes
    -----
    Simplified vendoring from:
    https://github.com/pandas-dev/pandas/blob/v1.5.2/pandas/core/reshape/tile.py#L50-L305
    """
    return cut(
        x,
        bins,
        right,
        labels,
        retbins,
        precision,
        include_lowest,
        duplicates,
        ordered,
    )