Skip to content

eds_scikit.biology.cleaning.utils

check_the_data_for_cleaning

check_the_data_for_cleaning(data: Data)

Check the required tables and columns in the Data

PARAMETER DESCRIPTION
data

Instantiated HiveData, PostgresData or PandasData

TYPE: Data

Source code in eds_scikit/biology/cleaning/utils.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def check_the_data_for_cleaning(data: Data):
    """Check the required tables and columns in the Data

    Parameters
    ----------
    data : Data
         Instantiated [``HiveData``][eds_scikit.io.hive.HiveData], [``PostgresData``][eds_scikit.io.postgres.PostgresData] or [``PandasData``][eds_scikit.io.files.PandasData]
    """
    check_tables(
        data,
        required_tables=[
            "measurement",
            "concept",
            "concept_relationship",
        ],
    )
    check_columns(
        data.measurement,
        required_columns=[
            "measurement_id",
            "visit_occurrence_id",
            "measurement_date",
            "value_as_number",
            "unit_source_value",
            "row_status_source_value",
            "measurement_source_concept_id",
        ],
    )
    check_columns(
        data.concept,
        required_columns=[
            "concept_id",
            "concept_name",
            "concept_code",
            "vocabulary_id",
        ],
    )
    check_columns(
        data.concept_relationship,
        required_columns=["concept_id_1", "concept_id_2", "relationship_id"],
    )