Skip to content

edsteva.models.step_function.algos.quantile

c_0_from_quantile

c_0_from_quantile(
    predictor: pd.DataFrame,
    index: List[str],
    q: float = 0.8,
    x: str = "date",
    y: str = "c",
) -> pd.DataFrame

Compute the quantile on the given y-axis. Column \(c_0\) is created.

\[ \hat{c_0} = x^{th} \text{ quantile of } c(t) \]
PARAMETER DESCRIPTION
predictor

\(c(t)\) computed in the Probe

TYPE: pd.DataFrame

index

Variable from which data is grouped

EXAMPLE: ["care_site_level", "stay_type", "note_type", "care_site_id"]

TYPE: List[str]

q

Quantile value

TYPE: float DEFAULT: 0.8

x

Column name for the time variable \(t\)

TYPE: str DEFAULT: 'date'

y

Column name for the completeness variable \(c(t)\)

TYPE: str DEFAULT: 'c'

Source code in edsteva/models/step_function/algos/quantile.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def c_0_from_quantile(
    predictor: pd.DataFrame,
    index: List[str],
    q: float = 0.8,
    x: str = "date",
    y: str = "c",
) -> pd.DataFrame:
    r"""Compute the quantile on the given y-axis. Column $c_0$ is created.

    $$
    \hat{c_0} = x^{th} \text{ quantile of } c(t)
    $$

    Parameters
    ----------
    predictor : pd.DataFrame
        $c(t)$ computed in the Probe
    index : List[str]
        Variable from which data is grouped

        **EXAMPLE**: `["care_site_level", "stay_type", "note_type", "care_site_id"]`
    q : float, optional
        Quantile value
    x : str, optional
        Column name for the time variable $t$
    y : str, optional
        Column name  for the completeness variable $c(t)$
    """

    check_columns(df=predictor, required_columns=[*index, x, y])

    quantile = (
        predictor.groupby(index)[[y]]
        .agg(lambda g: np.quantile(g, q=q))
        .rename(columns={y: "c_0"})
    )

    return predictor.merge(quantile, on=index)

t_0_from_c_0

t_0_from_c_0(
    predictor: pd.DataFrame,
    index: List[str],
    x: str = "date",
    y: str = "c",
    threshold: str = "c_0",
) -> pd.DataFrame

Compute \(t_0\) column using value of \(c_0\)

Returns the first date at which values are greater than \(c_0\):

\[ \hat{t_0} = \underset{t}{\mathrm{argmin}}(c(t) \geq \hat{c_0}) \]
PARAMETER DESCRIPTION
predictor

\(c(t)\) computed in the Probe

TYPE: pd.DataFrame

index

Variable from which data is grouped

TYPE: List[str]

x

Column name for the time variable \(t\)

TYPE: str DEFAULT: 'date'

y

Column name for the completeness variable \(c(t)\)

TYPE: str DEFAULT: 'c'

threshold

Column name for the threshold variable \(t_0\)

TYPE: str DEFAULT: 'c_0'

Source code in edsteva/models/step_function/algos/quantile.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def t_0_from_c_0(
    predictor: pd.DataFrame,
    index: List[str],
    x: str = "date",
    y: str = "c",
    threshold: str = "c_0",
) -> pd.DataFrame:
    r"""Compute $t_0$ column using value of $c_0$

    Returns the first date at which values are greater than $c_0$:

    $$
    \hat{t_0} = \underset{t}{\mathrm{argmin}}(c(t) \geq \hat{c_0})
    $$

    Parameters
    ----------
    predictor : pd.DataFrame
        $c(t)$ computed in the Probe
    index : List[str]
        Variable from which data is grouped
    x : str, optional
        Column name for the time variable $t$
    y : str, optional
        Column name  for the completeness variable $c(t)$
    threshold : str, optional
        Column name  for the threshold variable $t_0$
    """

    check_columns(df=predictor, required_columns=[*index, x, y, threshold])

    threshold = (
        predictor[predictor[y] > predictor[threshold]]
        .groupby(index)[[x]]
        .min()
        .rename(columns={x: "t_0"})
    )

    return predictor.merge(threshold, on=index)