Skip to content

eds_scikit.biology.viz.wrapper

plot_biology_summary

plot_biology_summary(measurement: DataFrame, value_column: str = 'value_as_number', unit_column: str = 'unit_source_value', save_folder_path: str = 'Biology_summary', stats_only: bool = False, terminologies: List[str] = None, debug: bool = False) -> Union[alt.ConcatChart, pd.DataFrame]

Aggregate measurements, create plots and saves all the concepts-sets in folder.

PARAMETER DESCRIPTION
data

Instantiated HiveData, PostgresData or PandasData

TYPE: Data

save_folder_path

Name of the folder where the plots will be saved

TYPE: str, optional DEFAULT: 'Biology_summary'

stats_only

If True, it will only aggregate the data for the [summary table][summary-table].

TYPE: bool, optional DEFAULT: False

terminologies

biology summary only on terminologies codes columns

TYPE: List[str], optional DEFAULT: None

value_column

value column for distribution summary plot

TYPE: str, optional DEFAULT: 'value_as_number'

debug

If True, info log will de displayed to follow aggregation steps

TYPE: bool, optional DEFAULT: False

RETURNS DESCRIPTION
List[alt.ConcatChart, pd.DataFrame]

Altair plots describing the volumetric and the distribution properties of your biological data along with a pandas DataFrame with a statistical summary

Source code in eds_scikit/biology/viz/wrapper.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def plot_biology_summary(
    measurement: DataFrame,
    value_column: str = "value_as_number",
    unit_column: str = "unit_source_value",
    save_folder_path: str = "Biology_summary",
    stats_only: bool = False,
    terminologies: List[str] = None,
    debug: bool = False,
) -> Union[alt.ConcatChart, pd.DataFrame]:
    """
    Aggregate measurements, create plots and saves all the concepts-sets in folder.


    Parameters
    ----------
    data : Data
         Instantiated [``HiveData``][eds_scikit.io.hive.HiveData], [``PostgresData``][eds_scikit.io.postgres.PostgresData] or [``PandasData``][eds_scikit.io.files.PandasData]
    save_folder_path : str, optional
        Name of the folder where the plots will be saved
    stats_only : bool, optional
        If ``True``, it will only aggregate the data for the [summary table][summary-table].
    terminologies : List[str], optional
        biology summary only on terminologies codes columns
    value_column : str, optional
        value column for distribution summary plot
    debug : bool, optional
        If ``True``, info log will de displayed to follow aggregation steps

    Returns
    -------
    List[alt.ConcatChart, pd.DataFrame]
        Altair plots describing the volumetric and the distribution properties of your biological data along with a pandas DataFrame with a statistical summary
    """

    if not value_column:
        raise ValueError(
            "Must give a 'value_column' parameter. By default, use value_as_number. Or value_as_number_normalized if exists."
        )
    if not unit_column:
        raise ValueError(
            "Must give a 'unit_column' parameter. By default, use unit_source_value. Or unit_source_value_normalized if exists."
        )

    if not os.path.isdir(save_folder_path):
        os.mkdir(save_folder_path)
        logger.info("{} folder has been created.", save_folder_path)

    if terminologies:
        measurement = measurement.drop(
            columns=[f"{col}_concept_code" for col in terminologies]
        )

    tables_agg = aggregate_measurement(
        measurement=measurement,
        value_column=value_column,
        unit_column=unit_column,
        stats_only=stats_only,
        overall_only=stats_only,
        category_columns=["concept_set", "care_site_short_name"],
        debug=debug,
    )

    table_names = list(tables_agg.keys())
    concept_sets_names = tables_agg[table_names[0]].concept_set.unique()

    for concept_set_name in concept_sets_names:

        concepts_set_path = "{}/{}".format(save_folder_path, concept_set_name)
        rmtree(concepts_set_path, ignore_errors=True)
        os.mkdir(concepts_set_path)
        logger.info(
            "{}/{} folder has been created.",
            save_folder_path,
            concept_set_name,
        )

        for table_name in table_names:
            table = tables_agg[table_name].query("concept_set == @concept_set_name")
            table.to_pickle(
                "{}/{}/{}.pkl".format(save_folder_path, concept_set_name, table_name)
            )

        logger.info(
            "{} has been processed and saved in {}/{} folder.",
            concept_set_name,
            save_folder_path,
            concept_set_name,
        )

        plot_concepts_set(
            concepts_set_name=concept_set_name, source_path=save_folder_path
        )
Back to top