Skip to content

eds_scikit.biology.utils.config

create_config_from_stats

create_config_from_stats(concepts_sets: List[ConceptsSet], config_name: str, stats_folder: str = 'Biology_summary')

Generate the configuration file from a statistical summary. It is needed [here][eds_scikit.biology.cleaning.transform.transform_measurement]

PARAMETER DESCRIPTION
concepts_sets

List of concepts-sets to select

TYPE: List[ConceptsSet]

config_name

Name of the folder where the configuration will be saved.

TYPE: str

stats_folder

Name of the statistical summary folder

TYPE: str DEFAULT: 'Biology_summary'

Source code in eds_scikit/biology/utils/config.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def create_config_from_stats(
    concepts_sets: List[ConceptsSet],
    config_name: str,
    stats_folder: str = "Biology_summary",
):
    """Generate the configuration file from a statistical summary. It is needed [here][eds_scikit.biology.cleaning.transform.transform_measurement]

    Parameters
    ----------
    concepts_sets : List[ConceptsSet]
        List of concepts-sets to select
    config_name : str
        Name of the folder where the configuration will be saved.
    stats_folder : str
        Name of the statistical summary folder
    """
    my_custom_config = pd.DataFrame()
    for concepts_set in concepts_sets:
        try:
            stats = pd.read_pickle(
                "{}/{}/measurement_stats.pkl".format(stats_folder, concepts_set.name)
            )
            stats["transformed_unit"] = (
                stats.groupby("unit_source_value")["count"]
                .sum("count")
                .sort_values(ascending=False)
                .index[0]
            )
            stats["concepts_set"] = concepts_set.name
            stats["Action"] = None
            stats["Coefficient"] = None

            my_custom_config = pd.concat([my_custom_config, stats])
        except OSError:
            logger.error(
                "{} has no statistical summary saved in {}",
                concepts_set.name,
                stats_folder,
            )
            pass

    if "care_site_short_name" in my_custom_config.columns:
        # Keep only the row computed from every care site
        my_custom_config = my_custom_config[
            my_custom_config.care_site_short_name == "ALL"
        ]

    os.makedirs(CONFIGS_PATH, exist_ok=True)

    my_custom_config.to_csv("{}/{}.csv".format(CONFIGS_PATH, config_name), index=False)

    register_configs()

list_all_configs

list_all_configs() -> List[str]

Helper to get the names of all saved biology configurations

RETURNS DESCRIPTION
List[str]

The configurations names

Source code in eds_scikit/biology/utils/config.py
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def list_all_configs() -> List[str]:
    """
    Helper to get the names of all saved biology configurations

    Returns
    -------
    List[str]
        The configurations names
    """
    registered = list(registry.data.get_all().keys())
    configs = [
        r.split(".")[-1] for r in registered if r.startswith("get_biology_config")
    ]
    return configs
Back to top