def from_regex_on_care_site_description(
care_site: DataFrame, subset_care_site_type_source_value: Union[list, set] = {"UDS"}
) -> DataFrame:
"""Use regular expressions on `care_site_name` to decide if it an ICU care site.
This relies on [this function][eds_scikit.structures.attributes.add_care_site_attributes].
The regular expression used to detect ICU is
`r"\bUSI|\bREA[N\s]|\bREA\b|\bUSC\b|SOINS.*INTENSIF|SURV.{0,15}CONT|\bSI\b|\bSC\b"`.
!!! aphp "Keeping only 'UDS'"
At AP-HP, all ICU are **UDS** (*Unité De Soins*).
Therefore, this function filters care sites by default to only keep UDS.
Parameters
----------
care_site: DataFrame
Should at least contains the `care_site_name` and `care_site_type_source_value` columns
subset_care_site_type_source_value: Union[list, set]
Acceptable values for `care_site_type_source_value`
Returns
-------
care_site: DataFrame
Dataframe with 1 added column corresponding to the following concept:
- `"IS_ICU"`
""" # noqa
care_site = attributes.add_care_site_attributes(
care_site, only_attributes=["IS_ICU"]
)
# Filtering matches
if subset_care_site_type_source_value:
care_site["IS_ICU"] = care_site["IS_ICU"] & (
care_site.care_site_type_source_value.isin(
subset_care_site_type_source_value
)
)
return care_site