Build a dataframe with parent categories as columns
Source code in eds_scikit/utils/hierarchy.py
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 | def build_hierarchy(
categories: pd.DataFrame,
relationships: pd.DataFrame,
) -> pd.DataFrame:
"""Build a dataframe with parent categories as columns"""
assert set(categories.columns) == {"id", "category"}
assert set(relationships.columns) == {"child", "parent"}
assert not categories["id"].duplicated().any()
assert not relationships.duplicated().any()
expanded_relationships = _follow_relationships(relationships)
expanded_relationships = expanded_relationships.loc[
expanded_relationships["child"].isin(categories["id"])
]
relationships_with_category = _deduplicate_parent_category(
expanded_relationships, categories
)
categories = _finalize_parent_categories(categories, relationships_with_category)
return categories
|