generate_omop_teva(data: HiveData, start_date: str, end_date: str, teva_config: dict = default_omop_teva_config, output_dir = 'omop_teva')
Generate OMOP TEVA folder.
PARAMETER |
DESCRIPTION |
data |
Must contain the visit_occurrence table.
TYPE:
HiveData
|
start_date |
The start date for data extraction.
TYPE:
str
|
end_date |
The end date for data extraction.
TYPE:
str
|
teva_config |
OMOP TEVA configuration, by default default_omop_teva_config . Must start with visit_occurrence configuration.
TYPE:
dict, optional
DEFAULT:
default_omop_teva_config
|
output_dir |
Output directory path, by default "omop_teva".
TYPE:
str, optional
DEFAULT:
'omop_teva'
|
Examples:
Example configuration for teva_config
:
default_omop_teva_config = {
"visit_occurrence": {
"category_columns": [
"visit_occurrence_id",
"care_site_short_name",
"stay_source_value"
],
"date_column": "visit_start_datetime",
"mapper": {
"visit_occurrence_id": {"not NaN": "."}
}
},
"other_table": {
"category_columns": [
"visit_occurrence_id",
"column A",
"column B",
"column C"
],
"date_column": "column_datetime",
"mapper": {
"column A": {"not NaN": "."},
"column B": {"X type": "X.*", "Y type": "Y"}
}
}
...
}
Source code in eds_scikit/plot/omop_teva.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 | def generate_omop_teva(
data: HiveData,
start_date: str,
end_date: str,
teva_config: dict = default_omop_teva_config,
output_dir="omop_teva",
):
"""
Generate OMOP TEVA folder.
Parameters
----------
data : HiveData
Must contain the visit_occurrence table.
start_date : str
The start date for data extraction.
end_date : str
The end date for data extraction.
teva_config : dict, optional
OMOP TEVA configuration, by default `default_omop_teva_config`. Must start with visit_occurrence configuration.
output_dir : str, optional
Output directory path, by default "omop_teva".
Examples
--------
Example configuration for `teva_config`:
default_omop_teva_config = {
"visit_occurrence": {
"category_columns": [
"visit_occurrence_id",
"care_site_short_name",
"stay_source_value"
],
"date_column": "visit_start_datetime",
"mapper": {
"visit_occurrence_id": {"not NaN": ".*"}
}
},
"other_table": {
"category_columns": [
"visit_occurrence_id",
"column A",
"column B",
"column C"
],
"date_column": "column_datetime",
"mapper": {
"column A": {"not NaN": ".*"},
"column B": {"X type": "X.*", "Y type": "Y"}
}
}
...
}
"""
if not os.path.exists(f"{output_dir}/"):
os.makedirs(f"{output_dir}/")
# First, preprocess visit_occurrence which will be merged with remaining config tables
try:
visit_occurrence = data.visit_occurrence
visit_occurrence = visit_occurrence.merge(
data.care_site[["care_site_id", "care_site_short_name"]], on="care_site_id"
)
teva_config["visit_occurrence"]
except AttributeError:
raise Exception(
"No visit_occurrence or care_site table in input data object. visit_occurrence and care_site table must be provided."
)
# Iterate config tables
for table_name, config in teva_config.items():
logger.info(f"Starting {table_name} processing.")
if table_name == "visit_occurrence":
visit_columns = [
*config["category_columns"],
config["date_column"],
"visit_occurrence_id",
]
visit_columns = list(
set(visit_columns).intersection(visit_occurrence.columns)
)
visit_occurrence = visit_occurrence[visit_columns]
table = visit_occurrence.copy()
else:
try:
table = data._read_table(table_name)
drop_columns = (
set(visit_occurrence.columns).intersection(table.columns)
).difference(["visit_occurrence_id"])
if drop_columns:
table = table.merge(
visit_occurrence.drop(columns=drop_columns),
on="visit_occurrence_id",
how="left",
)
else:
table = table.merge(
visit_occurrence, on="visit_occurrence_id", how="left"
)
except AttributeError:
logger.warning(
f"No {table_name} table in input data object. Skipping {table_name}."
)
continue
# Compute reduced table representation
table["visit_occurrence_id"] = table["visit_occurrence_id"].astype(str)
table_count = reduce_table(
table, start_date=start_date, end_date=end_date, **config
)
table_count = table_count[~(table_count == 0).any(axis=1)]
# Compute associated chart
chart = visualize_table(table_count, title=f"{table_name} table dashboard")
# Save computations
save_pickle(f"{output_dir}/{table_name}_count", table_count)
chart.save(f"{output_dir}/{table_name}_chart.html")
logger.info(f"{table_name} processing done.")
|