Skip to content

edsteva.probes.note.note

NoteProbe

Bases: BaseProbe

The NoteProbe computes \(c(t)\) the availability of clinical documents

PARAMETER DESCRIPTION
completeness_predictor

Algorithm used to compute the completeness predictor

EXAMPLE: "per_visit_default"

TYPE: str DEFAULT: 'per_visit_default'

ATTRIBUTE DESCRIPTION
_completeness_predictor

Algorithm used to compute the completeness predictor

VALUE: "per_visit_default"

TYPE: str

_index

Variable from which data is grouped

VALUE: ["care_site_level", "stay_type", "length_of_stay", "note_type", "care_site_id", "care_site_specialty", "specialties_set"]``

TYPE: List[str]

_viz_config

Dictionary of configuration for visualization purpose.

VALUE: {}

TYPE: List[str]

care_site_relationship

It describes the care site structure and gives the hierarchy of the different care site levels. (cf. prepare_care_site_relationship())

TYPE: pd.DataFrame

Source code in edsteva/probes/note/note.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
class NoteProbe(BaseProbe):
    r"""
    The ``NoteProbe`` computes $c(t)$ the availability of clinical documents

    Parameters
    ----------
    completeness_predictor: str
        Algorithm used to compute the completeness predictor

        **EXAMPLE**: ``"per_visit_default"``

    Attributes
    ----------
    _completeness_predictor: str
        Algorithm used to compute the completeness predictor

        **VALUE**: ``"per_visit_default"``
    _index: List[str]
        Variable from which data is grouped

        **VALUE**: ["care_site_level", "stay_type", "length_of_stay", "note_type", "care_site_id", "care_site_specialty", "specialties_set"]``
    _viz_config: List[str]
        Dictionary of configuration for visualization purpose.

        **VALUE**: ``{}``
    care_site_relationship: pd.DataFrame

        It describes the care site structure and gives the hierarchy of the different care site levels. (cf. [``prepare_care_site_relationship()``][edsteva.probes.utils.prepare_df.prepare_care_site_relationship])
    """

    def __init__(
        self,
        completeness_predictor: str = "per_visit_default",
    ):
        self._index = [
            "note_type",
            "care_site_id",
            "care_site_level",
            "care_sites_set",
            "care_site_specialty",
            "specialties_set",
            "stay_type",
            "stay_source",
            "length_of_stay",
            "provenance_source",
            "age_range",
            "drg_source",
            "condition_type",
            "gender_source_value",
        ]
        super().__init__(
            completeness_predictor=completeness_predictor,
            index=self._index,
        )

    def compute_process(
        self,
        data: Data,
        start_date: datetime,
        end_date: datetime,
        note_types: Union[bool, str, Dict[str, str]] = {
            "Urgence": "urge",
            "Ordonnance": "ordo",
            "CRH": "crh",
        },
        extra_data: Data = None,
        care_site_ids: List[int] = None,
        care_site_short_names: List[str] = None,
        care_site_levels: Union[bool, str, List[str]] = True,
        care_sites_sets: Union[str, Dict[str, str]] = None,
        care_site_specialties: Union[bool, List[str]] = None,
        specialties_sets: Union[str, Dict[str, str]] = None,
        stay_types: Union[bool, str, Dict[str, str]] = True,
        stay_sources: Union[bool, str, Dict[str, str]] = None,
        length_of_stays: List[float] = None,
        provenance_sources: Union[bool, str, Dict[str, str]] = None,
        condition_types: Union[bool, str, Dict[str, str]] = None,
        drg_sources: Union[bool, str, Dict[str, str]] = None,
        age_ranges: List[int] = None,
        gender_source_values: Union[bool, str, Dict[str, str]] = None,
        **kwargs,
    ):
        """Script to be used by [``compute()``][edsteva.probes.base.BaseProbe.compute]

        Parameters
        ----------
        data : Data
            Instantiated [``HiveData``][edsteva.io.hive.HiveData], [``PostgresData``][edsteva.io.postgres.PostgresData] or [``LocalData``][edsteva.io.files.LocalData]
        start_date : datetime, optional
            **EXAMPLE**: `"2019-05-01"`
        end_date : datetime, optional
            **EXAMPLE**: `"2021-07-01"`
        note_types: Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}` or `{"CRH": "crh", "Urgence": "urge"}`
        extra_data : Data, optional
            Instantiated [``HiveData``][edsteva.io.hive.HiveData], [``PostgresData``][edsteva.io.postgres.PostgresData] or [``LocalData``][edsteva.io.files.LocalData]. This is not OMOP-standardized data but data needed to associate note with UF and Pole. If not provided, it will only compute the predictor for hospitals.
        care_site_ids : List[int], optional
            **EXAMPLE**: `[8312056386, 8312027648]`
        care_site_short_names : List[str], optional
            **EXAMPLE**: `["HOSPITAL 1", "HOSPITAL 2"]`
        care_site_levels : Union[bool, str, List[str]], optional
            **EXAMPLE**: `["Hospital", "Pole", "UF", "UC", "UH"]`
        care_sites_sets: Union[str, Dict[str, str]], optional
            **EXAMPLE**: `{"All AP-HP": ".*"}` or `{"All AP-HP": ".*", "Pediatrics": r"debre|trousseau|necker"}`
        care_site_specialties: Union[bool, List[str]], optional
            **EXAMPLE**: `["CARDIOLOGIE", "CHIRURGIE"]`
        specialties_sets: Union[str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "ICU": r"REA\s|USI\s|SC\s"}`
        stay_types: Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "Urg_and_consult": "urgences|consultation"}` or `"hospitalisés`
        stay_sources: Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}, {"MCO" : "MCO", "MCO_PSY_SSR" : "MCO|Psychiatrie|SSR"}`
        length_of_stays: List[float], optional
            **EXAMPLE**: `[1, 30]`
        provenance_sources:  Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}, {"urgence" : "service d'urgence"}`
        condition_types :  Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"Pulmonary_infection": "J22|J15|J13|J958|..."}`
        drg_sources :  Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}, {"medical" : ".{2}M"}`
        age_ranges: List[int], optional
            **EXAMPLE**: `[18, 64]`
        gender_source_values: Union[bool, str, Dict[str, str]], optional
            **EXAMPLE**: `{"All": ".*"}, {"women" : "f"}`
        """
        if not note_types and "note_type" in self._index:
            self._index.remove("note_type")
        if not care_site_levels and "care_site_level" in self._index:
            self._index.remove("care_site_level")
        if not care_sites_sets and "care_sites_set" in self._index:
            self._index.remove("care_sites_set")
        if not care_site_specialties and "care_site_specialty" in self._index:
            self._index.remove("care_site_specialty")
        if not specialties_sets and "specialties_set" in self._index:
            self._index.remove("specialties_set")
        if not stay_types and "stay_type" in self._index:
            self._index.remove("stay_type")
        if not stay_sources and "stay_source" in self._index:
            self._index.remove("stay_source")
        if not length_of_stays and "length_of_stay" in self._index:
            self._index.remove("length_of_stay")
        if not provenance_sources and "provenance_source" in self._index:
            self._index.remove("provenance_source")
        if not age_ranges and "age_range" in self._index:
            self._index.remove("age_range")
        if condition_types is None and "condition_type" in self._index:
            self._index.remove("condition_type")
        if not drg_sources and "drg_source" in self._index:
            self._index.remove("drg_source")
        if not gender_source_values and "gender_source_value" in self._index:
            self._index.remove("gender_source_value")
        return completeness_predictors.get(self._completeness_predictor)(
            self,
            data=data,
            start_date=start_date,
            end_date=end_date,
            care_site_levels=care_site_levels,
            stay_types=stay_types,
            care_site_ids=care_site_ids,
            extra_data=extra_data,
            care_site_short_names=care_site_short_names,
            care_site_specialties=care_site_specialties,
            care_sites_sets=care_sites_sets,
            specialties_sets=specialties_sets,
            note_types=note_types,
            length_of_stays=length_of_stays,
            provenance_sources=provenance_sources,
            stay_sources=stay_sources,
            age_ranges=age_ranges,
            drg_sources=drg_sources,
            condition_types=condition_types,
            gender_source_values=gender_source_values,
            **kwargs,
        )

    def get_viz_config(self, viz_type: str, **kwargs):
        if viz_type in viz_configs.keys():
            _viz_config = self._viz_config.get(viz_type)
            if _viz_config is None:
                _viz_config = self._completeness_predictor
        else:
            raise ValueError(f"edsteva has no {viz_type} registry !")
        return viz_configs[viz_type].get(_viz_config)(self, **kwargs)

    def available_completeness_predictors(self):
        return list(completeness_predictors.get_all().keys())

compute_process

compute_process(
    data: Data,
    start_date: datetime,
    end_date: datetime,
    note_types: Union[bool, str, Dict[str, str]] = {
        "Urgence": "urge",
        "Ordonnance": "ordo",
        "CRH": "crh",
    },
    extra_data: Data = None,
    care_site_ids: List[int] = None,
    care_site_short_names: List[str] = None,
    care_site_levels: Union[bool, str, List[str]] = True,
    care_sites_sets: Union[str, Dict[str, str]] = None,
    care_site_specialties: Union[bool, List[str]] = None,
    specialties_sets: Union[str, Dict[str, str]] = None,
    stay_types: Union[bool, str, Dict[str, str]] = True,
    stay_sources: Union[bool, str, Dict[str, str]] = None,
    length_of_stays: List[float] = None,
    provenance_sources: Union[
        bool, str, Dict[str, str]
    ] = None,
    condition_types: Union[
        bool, str, Dict[str, str]
    ] = None,
    drg_sources: Union[bool, str, Dict[str, str]] = None,
    age_ranges: List[int] = None,
    gender_source_values: Union[
        bool, str, Dict[str, str]
    ] = None,
    **kwargs
)

Script to be used by compute()

PARAMETER DESCRIPTION
data

Instantiated HiveData, PostgresData or LocalData

TYPE: Data

start_date

EXAMPLE: "2019-05-01"

TYPE: datetime

end_date

EXAMPLE: "2021-07-01"

TYPE: datetime

note_types

EXAMPLE: {"All": ".*"} or {"CRH": "crh", "Urgence": "urge"}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: {'Urgence': 'urge', 'Ordonnance': 'ordo', 'CRH': 'crh'}

extra_data

Instantiated HiveData, PostgresData or LocalData. This is not OMOP-standardized data but data needed to associate note with UF and Pole. If not provided, it will only compute the predictor for hospitals.

TYPE: Data DEFAULT: None

care_site_ids

EXAMPLE: [8312056386, 8312027648]

TYPE: List[int] DEFAULT: None

care_site_short_names

EXAMPLE: ["HOSPITAL 1", "HOSPITAL 2"]

TYPE: List[str] DEFAULT: None

care_site_levels

EXAMPLE: ["Hospital", "Pole", "UF", "UC", "UH"]

TYPE: Union[bool, str, List[str]] DEFAULT: True

care_sites_sets

EXAMPLE: {"All AP-HP": ".*"} or {"All AP-HP": ".*", "Pediatrics": r"debre|trousseau|necker"}

TYPE: Union[str, Dict[str, str]] DEFAULT: None

care_site_specialties

EXAMPLE: ["CARDIOLOGIE", "CHIRURGIE"]

TYPE: Union[bool, List[str]] DEFAULT: None

specialties_sets

EXAMPLE: {"All": ".*"} or {"All": ".*", "ICU": r"REA\s|USI\s|SC\s"}

TYPE: Union[str, Dict[str, str]] DEFAULT: None

stay_types

EXAMPLE: {"All": ".*"} or {"All": ".*", "Urg_and_consult": "urgences|consultation"} or "hospitalisés

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: True

stay_sources

EXAMPLE: {"All": ".*"}, {"MCO" : "MCO", "MCO_PSY_SSR" : "MCO|Psychiatrie|SSR"}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: None

length_of_stays

EXAMPLE: [1, 30]

TYPE: List[float] DEFAULT: None

provenance_sources

EXAMPLE: {"All": ".*"}, {"urgence" : "service d'urgence"}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: None

condition_types

EXAMPLE: {"Pulmonary_infection": "J22|J15|J13|J958|..."}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: None

drg_sources

EXAMPLE: {"All": ".*"}, {"medical" : ".{2}M"}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: None

age_ranges

EXAMPLE: [18, 64]

TYPE: List[int] DEFAULT: None

gender_source_values

EXAMPLE: {"All": ".*"}, {"women" : "f"}

TYPE: Union[bool, str, Dict[str, str]] DEFAULT: None

Source code in edsteva/probes/note/note.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def compute_process(
    self,
    data: Data,
    start_date: datetime,
    end_date: datetime,
    note_types: Union[bool, str, Dict[str, str]] = {
        "Urgence": "urge",
        "Ordonnance": "ordo",
        "CRH": "crh",
    },
    extra_data: Data = None,
    care_site_ids: List[int] = None,
    care_site_short_names: List[str] = None,
    care_site_levels: Union[bool, str, List[str]] = True,
    care_sites_sets: Union[str, Dict[str, str]] = None,
    care_site_specialties: Union[bool, List[str]] = None,
    specialties_sets: Union[str, Dict[str, str]] = None,
    stay_types: Union[bool, str, Dict[str, str]] = True,
    stay_sources: Union[bool, str, Dict[str, str]] = None,
    length_of_stays: List[float] = None,
    provenance_sources: Union[bool, str, Dict[str, str]] = None,
    condition_types: Union[bool, str, Dict[str, str]] = None,
    drg_sources: Union[bool, str, Dict[str, str]] = None,
    age_ranges: List[int] = None,
    gender_source_values: Union[bool, str, Dict[str, str]] = None,
    **kwargs,
):
    """Script to be used by [``compute()``][edsteva.probes.base.BaseProbe.compute]

    Parameters
    ----------
    data : Data
        Instantiated [``HiveData``][edsteva.io.hive.HiveData], [``PostgresData``][edsteva.io.postgres.PostgresData] or [``LocalData``][edsteva.io.files.LocalData]
    start_date : datetime, optional
        **EXAMPLE**: `"2019-05-01"`
    end_date : datetime, optional
        **EXAMPLE**: `"2021-07-01"`
    note_types: Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}` or `{"CRH": "crh", "Urgence": "urge"}`
    extra_data : Data, optional
        Instantiated [``HiveData``][edsteva.io.hive.HiveData], [``PostgresData``][edsteva.io.postgres.PostgresData] or [``LocalData``][edsteva.io.files.LocalData]. This is not OMOP-standardized data but data needed to associate note with UF and Pole. If not provided, it will only compute the predictor for hospitals.
    care_site_ids : List[int], optional
        **EXAMPLE**: `[8312056386, 8312027648]`
    care_site_short_names : List[str], optional
        **EXAMPLE**: `["HOSPITAL 1", "HOSPITAL 2"]`
    care_site_levels : Union[bool, str, List[str]], optional
        **EXAMPLE**: `["Hospital", "Pole", "UF", "UC", "UH"]`
    care_sites_sets: Union[str, Dict[str, str]], optional
        **EXAMPLE**: `{"All AP-HP": ".*"}` or `{"All AP-HP": ".*", "Pediatrics": r"debre|trousseau|necker"}`
    care_site_specialties: Union[bool, List[str]], optional
        **EXAMPLE**: `["CARDIOLOGIE", "CHIRURGIE"]`
    specialties_sets: Union[str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "ICU": r"REA\s|USI\s|SC\s"}`
    stay_types: Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}` or `{"All": ".*", "Urg_and_consult": "urgences|consultation"}` or `"hospitalisés`
    stay_sources: Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}, {"MCO" : "MCO", "MCO_PSY_SSR" : "MCO|Psychiatrie|SSR"}`
    length_of_stays: List[float], optional
        **EXAMPLE**: `[1, 30]`
    provenance_sources:  Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}, {"urgence" : "service d'urgence"}`
    condition_types :  Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"Pulmonary_infection": "J22|J15|J13|J958|..."}`
    drg_sources :  Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}, {"medical" : ".{2}M"}`
    age_ranges: List[int], optional
        **EXAMPLE**: `[18, 64]`
    gender_source_values: Union[bool, str, Dict[str, str]], optional
        **EXAMPLE**: `{"All": ".*"}, {"women" : "f"}`
    """
    if not note_types and "note_type" in self._index:
        self._index.remove("note_type")
    if not care_site_levels and "care_site_level" in self._index:
        self._index.remove("care_site_level")
    if not care_sites_sets and "care_sites_set" in self._index:
        self._index.remove("care_sites_set")
    if not care_site_specialties and "care_site_specialty" in self._index:
        self._index.remove("care_site_specialty")
    if not specialties_sets and "specialties_set" in self._index:
        self._index.remove("specialties_set")
    if not stay_types and "stay_type" in self._index:
        self._index.remove("stay_type")
    if not stay_sources and "stay_source" in self._index:
        self._index.remove("stay_source")
    if not length_of_stays and "length_of_stay" in self._index:
        self._index.remove("length_of_stay")
    if not provenance_sources and "provenance_source" in self._index:
        self._index.remove("provenance_source")
    if not age_ranges and "age_range" in self._index:
        self._index.remove("age_range")
    if condition_types is None and "condition_type" in self._index:
        self._index.remove("condition_type")
    if not drg_sources and "drg_source" in self._index:
        self._index.remove("drg_source")
    if not gender_source_values and "gender_source_value" in self._index:
        self._index.remove("gender_source_value")
    return completeness_predictors.get(self._completeness_predictor)(
        self,
        data=data,
        start_date=start_date,
        end_date=end_date,
        care_site_levels=care_site_levels,
        stay_types=stay_types,
        care_site_ids=care_site_ids,
        extra_data=extra_data,
        care_site_short_names=care_site_short_names,
        care_site_specialties=care_site_specialties,
        care_sites_sets=care_sites_sets,
        specialties_sets=specialties_sets,
        note_types=note_types,
        length_of_stays=length_of_stays,
        provenance_sources=provenance_sources,
        stay_sources=stay_sources,
        age_ranges=age_ranges,
        drg_sources=drg_sources,
        condition_types=condition_types,
        gender_source_values=gender_source_values,
        **kwargs,
    )