Skip to content

eds_scikit.period

tagging

tagging(tag_to_df: DataFrame, tag_from_df: DataFrame, concept_to_tag: str, tag_to_date_cols: List[str] = ['t_start', 't_end'], tag_from_date_cols: List[str] = ['t_start', 't_end'], algo: str = 'intersection') -> DataFrame
PARAMETER DESCRIPTION
tag_to_df

TYPE: DataFrame

tag_from_df

TYPE: DataFrame

concept_to_tag

TYPE: str

tag_to_date_cols

TYPE: List[str], optional DEFAULT: ['t_start', 't_end']

tag_from_date_cols

TYPE: List[str], optional DEFAULT: ['t_start', 't_end']

algo

TYPE: str, optional DEFAULT: 'intersection'

RETURNS DESCRIPTION
DataFrame
Source code in eds_scikit/period/tagging_functions.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def tagging(
    tag_to_df: DataFrame,
    tag_from_df: DataFrame,
    concept_to_tag: str,
    tag_to_date_cols: List[str] = ["t_start", "t_end"],
    tag_from_date_cols: List[str] = ["t_start", "t_end"],
    algo: str = "intersection",
) -> DataFrame:
    """

    Parameters
    ----------
    tag_to_df : DataFrame
    tag_from_df : DataFrame
    concept_to_tag : str
    tag_to_date_cols : List[str], optional
    tag_from_date_cols : List[str], optional
    algo : str, optional

    Returns
    -------
    DataFrame
    """
    framework = get_framework(tag_to_df)

    tag_to_df = tag_to_df.assign(event_id=tag_to_df.index)

    tag_from = tag_from_df.loc[
        tag_from_df.concept == concept_to_tag,
        ["person_id", "value"] + ["t_start", "t_end"],
    ]

    tmp = (
        tag_to_df.rename(
            columns={tag_to_date_cols[0]: "t_start_x", tag_to_date_cols[1]: "t_end_x"}
        )
        .merge(
            tag_from.rename(
                columns={
                    tag_from_date_cols[0]: "t_start_y",
                    tag_from_date_cols[1]: "t_end_y",
                }
            ),
            on="person_id",
            how="left",
        )
        .dropna(subset=["t_start_x", "t_end_x", "t_start_y", "t_end_y"])
    )

    if len(tmp) == 0:
        # TODO: is this necessary ?
        logger.warning("No matching were found between the 2 DataFrames")

        return framework.DataFrame(
            columns=["person_id", "t_start", "t_end", "concept", "value"]
        )

    tmp["tag"] = compare_intervals(
        tmp["t_start_x"],
        tmp["t_end_x"],
        tmp["t_start_y"],
        tmp["t_end_y"],
        algo=algo,
    )

    value_col = (
        "value_y"
        if (("value" in tag_to_df.columns) and ("value" in tag_from_df.columns))
        else "value"
    )

    tags = (
        tmp.groupby(["event_id", value_col])
        .tag.any()
        .unstack()
        .fillna(False)
        .reset_index()
    )
    tags = tag_to_df[["event_id"]].merge(tags, on="event_id", how="left").fillna(False)
    tags = tag_to_df.merge(tags, on="event_id", how="left").drop(columns="event_id")
    return tags