Skip to content

eds_scikit.utils.hierarchy

build_hierarchy

build_hierarchy(categories: pd.DataFrame, relationships: pd.DataFrame) -> pd.DataFrame

Build a dataframe with parent categories as columns

Source code in eds_scikit/utils/hierarchy.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def build_hierarchy(
    categories: pd.DataFrame,
    relationships: pd.DataFrame,
) -> pd.DataFrame:
    """Build a dataframe with parent categories as columns"""
    assert set(categories.columns) == {"id", "category"}
    assert set(relationships.columns) == {"child", "parent"}
    assert not categories["id"].duplicated().any()
    assert not relationships.duplicated().any()

    expanded_relationships = _follow_relationships(relationships)

    expanded_relationships = expanded_relationships.loc[
        expanded_relationships["child"].isin(categories["id"])
    ]

    relationships_with_category = _deduplicate_parent_category(
        expanded_relationships, categories
    )

    categories = _finalize_parent_categories(categories, relationships_with_category)

    return categories