Skip to content

eds_scikit.biology.utils.process_measurement

filter_measurement_valid

filter_measurement_valid(measurement: DataFrame) -> DataFrame

Filter valid observations based on the row_status_source_value column

PARAMETER DESCRIPTION
measurement

DataFrame to filter

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

DataFrame with valid observations only

Source code in eds_scikit/biology/utils/process_measurement.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def filter_measurement_valid(measurement: DataFrame) -> DataFrame:
    """Filter valid observations based on the `row_status_source_value` column

    Parameters
    ----------
    measurement : DataFrame
        DataFrame to filter

    Returns
    -------
    DataFrame
        DataFrame with valid observations only
    """
    check_columns(
        df=measurement,
        required_columns=["row_status_source_value"],
        df_name="measurment",
    )
    measurement_valid = measurement[measurement["row_status_source_value"] == "Validé"]
    measurement_valid = measurement_valid.drop(columns=["row_status_source_value"])
    return measurement_valid

filter_measurement_by_date

filter_measurement_by_date(measurement: DataFrame, start_date: datetime = None, end_date: datetime = None) -> DataFrame

Filter observations that are inside the selected time window

PARAMETER DESCRIPTION
measurement

DataFrame to filter

TYPE: DataFrame

start_date

EXAMPLE: "2019-05-01"

TYPE: datetime, optional DEFAULT: None

end_date

EXAMPLE: "2022-05-01"

TYPE: datetime, optional DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame with observations inside the selected time window only

Source code in eds_scikit/biology/utils/process_measurement.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def filter_measurement_by_date(
    measurement: DataFrame, start_date: datetime = None, end_date: datetime = None
) -> DataFrame:
    """Filter observations that are inside the selected time window

    Parameters
    ----------
    measurement : DataFrame
        DataFrame to filter
    start_date : datetime, optional
        **EXAMPLE**: `"2019-05-01"`
    end_date : datetime, optional
        **EXAMPLE**: `"2022-05-01"`

    Returns
    -------
    DataFrame
        DataFrame with observations inside the selected time window only
    """
    check_columns(
        df=measurement, required_columns=["measurement_date"], df_name="measurment"
    )

    measurement.measurement_date = measurement.measurement_date.astype("datetime64[ns]")

    measurement.dropna(subset=["measurement_date"], inplace=True)

    if start_date:
        measurement = measurement[measurement["measurement_date"] >= start_date]
    if end_date:
        measurement = measurement[measurement["measurement_date"] <= end_date]

    return measurement

tag_measurement_anomaly

tag_measurement_anomaly(measurement: DataFrame) -> DataFrame
PARAMETER DESCRIPTION
measurement

DataFrame to filter

TYPE: DataFrame

start_date

EXAMPLE: "2019-05-01"

TYPE: datetime, optional

end_date

EXAMPLE: "2022-05-01"

TYPE: datetime, optional

Source code in eds_scikit/biology/utils/process_measurement.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def tag_measurement_anomaly(measurement: DataFrame) -> DataFrame:
    """

    Parameters
    ----------
    measurement : DataFrame
        DataFrame to filter
    start_date : datetime, optional
        **EXAMPLE**: `"2019-05-01"`
    end_date : datetime, optional
        **EXAMPLE**: `"2022-05-01"`

    Returns
    -------
    """

    measurement["range_high_anomaly"] = (~measurement.range_high.isna()) & (
        measurement["value_as_number"] > measurement["range_high"]
    )
    measurement["range_low_anomaly"] = (~measurement.range_low.isna()) & (
        measurement["value_as_number"] < measurement["range_low"]
    )

    return measurement

convert_measurement_units

convert_measurement_units(measurement: DataFrame, concepts_sets: List[ConceptsSet]) -> DataFrame

Add value_as_number_normalized, unit_source_value_normalized and factor columns to measurement dataframe based on concepts_sets and units.

PARAMETER DESCRIPTION
measurement

TYPE: DataFrame

concepts_sets

TYPE: List[ConceptsSet]

RETURNS DESCRIPTION
DataFrame

Measurement with added columns value_as_number_normalized, unit_source_value_normalized and factor.

Source code in eds_scikit/biology/utils/process_measurement.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def convert_measurement_units(
    measurement: DataFrame, concepts_sets: List[ConceptsSet]
) -> DataFrame:

    """Add value_as_number_normalized, unit_source_value_normalized and factor columns to measurement dataframe based on concepts_sets and units.

    Parameters
    ----------
    measurement : DataFrame
    concepts_sets : List[ConceptsSet]

    Returns
    -------
    DataFrame
        Measurement with added columns value_as_number_normalized, unit_source_value_normalized and factor.
    """

    if is_koalas(measurement):
        measurement = cache(measurement)
        measurement.shape
        conversion_table = to(
            "koalas", get_conversion_table(measurement, concepts_sets)
        )
    else:
        conversion_table = get_conversion_table(measurement, concepts_sets)

    measurement = measurement.merge(
        conversion_table, on=["concept_set", "unit_source_value"]
    )
    measurement["value_as_number_normalized"] = (
        measurement["value_as_number"] * measurement["factor"]
    )

    return measurement

get_conversion_table

get_conversion_table(measurement: DataFrame, concepts_sets: List[ConceptsSet]) -> DataFrame

Given measurement dataframe and list of concepts_sets output conversion table to be merged with measurement.

PARAMETER DESCRIPTION
measurement

TYPE: DataFrame

concepts_sets

TYPE: List[ConceptsSet]

RETURNS DESCRIPTION
DataFrame

Conversion table to be merged with measurement

Source code in eds_scikit/biology/utils/process_measurement.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def get_conversion_table(
    measurement: DataFrame, concepts_sets: List[ConceptsSet]
) -> DataFrame:

    """Given measurement dataframe and list of concepts_sets output conversion table to be merged with measurement.

    Parameters
    ----------
    measurement : DataFrame
    concepts_sets : List[ConceptsSet]

    Returns
    -------
    DataFrame
        Conversion table to be merged with measurement
    """
    conversion_table = (
        measurement.groupby("concept_set")["unit_source_value"]
        .unique()
        .explode()
        .to_frame()
        .reset_index()
    )
    conversion_table = to("pandas", conversion_table)
    conversion_table["unit_source_value_normalized"] = conversion_table[
        "unit_source_value"
    ]
    conversion_table["factor"] = conversion_table.apply(
        lambda x: 1 if x.unit_source_value_normalized else 0, axis=1
    )

    for concept_set in concepts_sets:
        unit_source_value_normalized = concept_set.units.target_unit
        conversion_table.loc[
            conversion_table.concept_set == concept_set.name,
            "unit_source_value_normalized",
        ] = conversion_table.apply(
            lambda x: unit_source_value_normalized
            if concept_set.units.can_be_converted(
                x.unit_source_value, unit_source_value_normalized
            )
            else concept_set.units.get_unit_base(x.unit_source_value),
            axis=1,
        )
        conversion_table.loc[
            conversion_table.concept_set == concept_set.name, "factor"
        ] = conversion_table.apply(
            lambda x: concept_set.units.convert_unit(
                x.unit_source_value, x.unit_source_value_normalized
            ),
            axis=1,
        )

    conversion_table = conversion_table.fillna(1)

    return conversion_table
Back to top