Skip to content

eds_scikit.utils.framework

BackendDispatcher

Dispatcher between pandas, koalas and custom methods.

In addition to the methods below, use the BackendDispatcher class to access the custom functions defined in CustomImplem.

Examples:

Use a dispatcher function

>>> from eds_scikit.utils.framework import bd
>>> bd.is_pandas(pd.DataFrame())
True

Use a custom implemented function

>>> df = pd.DataFrame({"categ": ["a", "b", "c"]})
>>> bd.add_unique_id(df, col_name="id")
  categ  id
0     a   0
1     b   1
2     c   2

get_backend

get_backend(obj) -> Optional[ModuleType]

Return the backend of a given object.

PARAMETER DESCRIPTION
obj

RETURNS DESCRIPTION
backend

TYPE: a backend among

Examples:

Get the backend from a DataFrame and create another DataFrame from it. This is especially useful at runtime, when you need to infer the backend of the input.

>>> backend = bd.get_backend(pd.DataFrame())
>>> backend
<module 'pandas'>
>>> df = backend.DataFrame()
>>> bd.get_backend(ks.DataFrame())
<module 'koalas'>

For demo purposes, return the backend when provided directly

>>> bd.get_backend(ks)
<module 'koalas'>
>>> bd.get_backend(spark)
None
Source code in eds_scikit/utils/framework.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def get_backend(self, obj) -> Optional[ModuleType]:
    """Return the backend of a given object.

    Parameters
    ----------
    obj: DataFrame or backend module among pandas or koalas.

    Returns
    -------
    backend: a backend among {pd, ks} or None

    Examples
    --------

    Get the backend from a DataFrame and create another DataFrame from it.
    This is especially useful at runtime, when you need to infer the
    backend of the input.

    >>> backend = bd.get_backend(pd.DataFrame())
    >>> backend
    <module 'pandas'>
    >>> df = backend.DataFrame()

    >>> bd.get_backend(ks.DataFrame())
    <module 'koalas'>

    For demo purposes, return the backend when provided directly

    >>> bd.get_backend(ks)
    <module 'koalas'>
    >>> bd.get_backend(spark)
    None
    """
    if isinstance(obj, str):
        return {
            "pd": pd,
            "pandas": pd,
            "ks": ks,
            "koalas": ks,
        }.get(obj)

    for backend in VALID_FRAMEWORKS:
        if (
            obj.__class__.__module__.startswith(backend.__name__)  # DataFrame()
            or getattr(obj, "__name__", None) == backend.__name__  # pd or ks
        ):
            return backend
    return None

is_pandas

is_pandas(obj) -> bool

Return True when the obj is either a pd.DataFrame or the pandas module.

Source code in eds_scikit/utils/framework.py
158
159
160
def is_pandas(self, obj) -> bool:
    """Return True when the obj is either a pd.DataFrame or the pandas module."""
    return self.get_backend(obj) is pd

is_koalas

is_koalas(obj: Any) -> bool

Return True when the obj is either a ks.DataFrame or the koalas module.

Source code in eds_scikit/utils/framework.py
162
163
164
def is_koalas(self, obj: Any) -> bool:
    """Return True when the obj is either a ks.DataFrame or the koalas module."""
    return self.get_backend(obj) is ks

to

to(obj, backend)

Convert a dataframe to the provided backend.

PARAMETER DESCRIPTION
obj

The object(s) to convert to the provided backend

backend: str, DataFrame or pandas, koalas module The desired output backend.

RETURNS DESCRIPTION
out

The converted object, in the same format as provided in input.

TYPE: DataFrame or iterabel of DataFrame (list, tuple, dict)

Examples:

Convert a single DataFrame

>>> df = pd.DataFrame({"a": [1, 2]})
>>> kdf = bd.to(df, backend="koalas")
>>> type(kdf)
databricks.koalas.frame.DataFrame

Convert a list of DataFrame

>>> extra_kdf = ks.DataFrame({"b": [0, 1]})
>>> another_kdf = ks.DataFrame({"c": [2, 3]})
>>> kdf_list = [kdf, extra_kdf, another_kdf]
>>> df_list = bd.to(kdf_list, backend="pandas")
>>> type(df_list)
list
>>> len(df_list)
3
>>> type(df_list[0])
pandas.core.frame.DataFrame

Convert a dictionnary of DataFrame

>>> df_dict = {"df_1": pd.DataFrame({"a": [1, 2]}), "df_2": pd.DataFrame({"a": [2, 3]})}
>>> kdf_dict = bd.to(df_dict, backend="koalas")
>>> type(kdf_dict)
dict
>>> kdf_dict.keys()
dict_keys(["df_1", "df_2"])
>>> type(kdf_dict["df_1"])
databricks.koalas.frame.DataFrame
Source code in eds_scikit/utils/framework.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def to(self, obj, backend):
    """Convert a dataframe to the provided backend.

    Parameters
    ----------
    obj: DataFrame or iterable of DataFrame (list, tuple, dict)
        The object(s) to convert to the provided backend

    backend: str, DataFrame or pandas, koalas module
        The desired output backend.

    Returns
    -------
    out: DataFrame or iterabel of DataFrame (list, tuple, dict)
      The converted object, in the same format as provided in input.

    Examples
    --------

    Convert a single DataFrame

    >>> df = pd.DataFrame({"a": [1, 2]})
    >>> kdf = bd.to(df, backend="koalas")
    >>> type(kdf)
    databricks.koalas.frame.DataFrame

    Convert a list of DataFrame

    >>> extra_kdf = ks.DataFrame({"b": [0, 1]})
    >>> another_kdf = ks.DataFrame({"c": [2, 3]})
    >>> kdf_list = [kdf, extra_kdf, another_kdf]
    >>> df_list = bd.to(kdf_list, backend="pandas")
    >>> type(df_list)
    list
    >>> len(df_list)
    3
    >>> type(df_list[0])
    pandas.core.frame.DataFrame

    Convert a dictionnary of DataFrame

    >>> df_dict = {"df_1": pd.DataFrame({"a": [1, 2]}), "df_2": pd.DataFrame({"a": [2, 3]})}
    >>> kdf_dict = bd.to(df_dict, backend="koalas")
    >>> type(kdf_dict)
    dict
    >>> kdf_dict.keys()
    dict_keys(["df_1", "df_2"])
    >>> type(kdf_dict["df_1"])
    databricks.koalas.frame.DataFrame
    """
    if isinstance(obj, (list, tuple)):
        results = []
        for _obj in obj:
            results.append(self.to(_obj, backend))
        return results

    if isinstance(obj, dict):
        results = {}
        for k, _obj in obj.items():
            results[k] = self.to(_obj, backend)
        return results

    backend = self.get_backend(backend)

    if self.is_pandas(backend):
        return self.to_pandas(obj)
    elif self.is_koalas(backend):
        return self.to_koalas(obj)
    else:
        raise ValueError("Unknown backend")
Back to top