Skip to content

eds_scikit.biology.utils.process_measurement

get_valid_measurement

get_valid_measurement(measurement: DataFrame) -> DataFrame

Filter valid observations based on the row_status_source_value column

PARAMETER DESCRIPTION
measurement

DataFrame to filter

TYPE: DataFrame

RETURNS DESCRIPTION
DataFrame

DataFrame with valid observations only

Source code in eds_scikit/biology/utils/process_measurement.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def get_valid_measurement(measurement: DataFrame) -> DataFrame:
    """Filter valid observations based on the `row_status_source_value` column

    Parameters
    ----------
    measurement : DataFrame
        DataFrame to filter

    Returns
    -------
    DataFrame
        DataFrame with valid observations only
    """
    check_columns(
        df=measurement,
        required_columns=["row_status_source_value"],
        df_name="measurment",
    )
    measurement_valid = measurement[measurement["row_status_source_value"] == "Validé"]
    measurement_valid = measurement_valid.drop(columns=["row_status_source_value"])
    logger.info("Valid measurements have been selected")
    return measurement_valid

filter_measurement_by_date

filter_measurement_by_date(measurement: DataFrame, start_date: datetime = None, end_date: datetime = None) -> DataFrame

Filter observations that are inside the selected time window

PARAMETER DESCRIPTION
measurement

DataFrame to filter

TYPE: DataFrame

start_date

EXAMPLE: "2019-05-01"

TYPE: datetime, optional DEFAULT: None

end_date

EXAMPLE: "2022-05-01"

TYPE: datetime, optional DEFAULT: None

RETURNS DESCRIPTION
DataFrame

DataFrame with observations inside the selected time window only

Source code in eds_scikit/biology/utils/process_measurement.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def filter_measurement_by_date(
    measurement: DataFrame, start_date: datetime = None, end_date: datetime = None
) -> DataFrame:
    """Filter observations that are inside the selected time window

    Parameters
    ----------
    measurement : DataFrame
        DataFrame to filter
    start_date : datetime, optional
        **EXAMPLE**: `"2019-05-01"`
    end_date : datetime, optional
        **EXAMPLE**: `"2022-05-01"`

    Returns
    -------
    DataFrame
        DataFrame with observations inside the selected time window only
    """
    check_columns(
        df=measurement, required_columns=["measurement_date"], df_name="measurment"
    )

    if "measurement_datetime" in measurement.columns:
        measurement = _select_adequate_date_column(measurement=measurement)

    measurement.measurement_date = measurement.measurement_date.astype("datetime64[ns]")

    measurement.dropna(subset=["measurement_date"], inplace=True)

    if start_date:
        measurement = measurement[measurement["measurement_date"] >= start_date]
        logger.info("Measurements conducted after {} have been selected", start_date)
    if end_date:
        measurement = measurement[measurement["measurement_date"] <= end_date]
        logger.info("Measurements conducted before {} have been selected", end_date)

    return measurement