Skip to content

eds_scikit.biology.utils.process_concepts

ConceptsSet

Class defining the concepts-sets with 2 attributes:

  • name: the name of the concepts-set
  • concept_codes : the list of concepts codes included in the concepts-set
Source code in eds_scikit/biology/utils/process_concepts.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class ConceptsSet:
    """Class defining the concepts-sets with 2 attributes:

    - ``name``: the name of the concepts-set
    - ``concept_codes`` : the list of concepts codes included in the concepts-set
    """

    def __init__(self, name: str, concept_codes: List[str] = None):
        self.name = name
        if concept_codes is None:
            self.concept_codes = fetch_concept_codes_from_name(name)
        else:
            self.concept_codes = concept_codes

    def add_concept_codes(self, concept_codes: Union[str, List[str]]):
        if isinstance(concept_codes, str):
            if concept_codes not in self.concept_codes:
                self.concept_codes.append(concept_codes)
        elif isinstance(concept_codes, list):
            for concept_code in concept_codes:
                if concept_code not in self.concept_codes:
                    self.concept_codes.append(concept_code)
        else:
            logger.error("concept_codes must be string or list")
            raise TypeError

    def remove_concept_codes(self, concept_codes: Union[str, List[str]]):
        if isinstance(concept_codes, str):
            if concept_codes in self.concept_codes:
                self.concept_codes.remove(concept_codes)
                logger.info("concept_code {} has been deleted", concept_codes)
        elif isinstance(concept_codes, list):
            for concept_code in concept_codes:
                if concept_code in self.concept_codes:
                    self.concept_codes.remove(concept_code)
                    logger.info("concept_code {} has been deleted", concept_code)
        else:
            logger.error("concept_codes must be string or list")
            raise TypeError

fetch_all_concepts_set

fetch_all_concepts_set(concepts_sets_table_name: str = 'default_concepts_sets') -> List[ConceptsSet]

Returns a list of all the concepts-sets of the chosen tables. By default, the table is here.

PARAMETER DESCRIPTION
concepts_sets_table_name

Name of the table to extract concepts-sets from

TYPE: str, optional DEFAULT: 'default_concepts_sets'

RETURNS DESCRIPTION
List[ConceptsSet]

The list of all concepts-sets in the selected table

Source code in eds_scikit/biology/utils/process_concepts.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def fetch_all_concepts_set(
    concepts_sets_table_name: str = "default_concepts_sets",
) -> List[ConceptsSet]:
    """Returns a list of all the concepts-sets of the chosen tables. By default, the table is [here][concepts-sets].

    Parameters
    ----------
    concepts_sets_table_name : str, optional
        Name of the table to extract concepts-sets from

    Returns
    -------
    List[ConceptsSet]
        The list of all concepts-sets in the selected table
    """
    concepts_sets = []
    default_concepts_sets = getattr(datasets, concepts_sets_table_name)
    for concepts_set_name in default_concepts_sets.concepts_set_name:
        concepts_sets.append(ConceptsSet(concepts_set_name))
    logger.info("Fetch all concepts-sets from table {}", concepts_sets_table_name)
    return concepts_sets

get_concept_src_to_std

get_concept_src_to_std(concept: DataFrame, concept_relationship: DataFrame, concepts_sets: List[ConceptsSet], standard_concept_regex: dict = default_standard_concept_regex, standard_terminologies: List[str] = default_standard_terminologies) -> pd.DataFrame

Process Concept and Concept Relationship tables to obtain a wide DataFrame that gives for all concepts-sets the source code along with the standard concepts codes.

PARAMETER DESCRIPTION
concept

Concept OMOP table

TYPE: DataFrame

concept_relationship

Concept Relationship OMOP table

TYPE: DataFrame

concepts_sets

List of concepts-sets to select

TYPE: List[ConceptsSet]

standard_concept_regex

EXAMPLE: {"LOINC": "[0-9]{2,5}[-][0-9]","AnaBio": "[A-Z][0-9]{4}"}

TYPE: dict, optional DEFAULT: default_standard_concept_regex

standard_terminologies

EXAMPLE: ["LOINC", "AnaBio"]

TYPE: List[str], optional DEFAULT: default_standard_terminologies

RETURNS DESCRIPTION
pd.DataFrame

DataFrame with a column for the source concepts codes and columns for the selected standard concepts codes

Source code in eds_scikit/biology/utils/process_concepts.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def get_concept_src_to_std(
    concept: DataFrame,
    concept_relationship: DataFrame,
    concepts_sets: List[ConceptsSet],
    standard_concept_regex: dict = default_standard_concept_regex,
    standard_terminologies: List[str] = default_standard_terminologies,
) -> pd.DataFrame:
    """Process ``Concept`` and ``Concept Relationship`` tables to obtain a wide DataFrame that gives for all concepts-sets the source code along with the standard concepts codes.

    Parameters
    ----------
    concept : DataFrame
        [Concept](https://www.ohdsi.org/web/wiki/doku.php?id=documentation:cdm:concept) OMOP table
    concept_relationship : DataFrame
        [Concept Relationship](https://www.ohdsi.org/web/wiki/doku.php?id=documentation:cdm:concept_relationship) OMOP table
    concepts_sets : List[ConceptsSet]
        List of concepts-sets to select
    standard_concept_regex : dict, optional
        **EXAMPLE**: `{"LOINC": "[0-9]{2,5}[-][0-9]","AnaBio": "[A-Z][0-9]{4}"}`
    standard_terminologies : List[str], optional
        **EXAMPLE**: `["LOINC", "AnaBio"]`


    Returns
    -------
    pd.DataFrame
        DataFrame with a column for the source concepts codes and columns for the selected standard concepts codes"""
    check_columns(
        concept,
        required_columns=[
            "concept_id",
            "concept_code",
            "concept_name",
            "vocabulary_id",
        ],
        df_name="concept",
    )
    check_columns(
        concept_relationship,
        required_columns=[
            "concept_id_1",
            "concept_id_2",
            "relationship_id",
        ],
        df_name="concept_relationship",
    )

    # Get desired concepts
    filtered_concepts = _filter_concepts(concept, concepts_sets, standard_concept_regex)

    # Get only parent concepts
    concept_relationship = concept_relationship[
        concept_relationship.relationship_id.isin(["Maps to", "Mapped from"])
    ]
    concept_relationship = concept_relationship.drop(columns="relationship_id")

    # Get the complete standard concept id list
    original_concpet_id = filtered_concepts.merge(
        concept_relationship, left_on="concept_id", right_on="concept_id_2", how="inner"
    )[["concept_id_1", "concepts_set"]]
    original_concpet_id.drop_duplicates("concept_id_1", inplace=True)
    related_concept_id = concept_relationship.merge(
        original_concpet_id, on="concept_id_1", how="inner"
    )
    long_src_to_std = concept.merge(
        related_concept_id, left_on="concept_id", right_on="concept_id_2", how="inner"
    )
    long_src_to_std = long_src_to_std.drop(columns=["concept_id", "concept_id_2"])
    long_src_to_std.rename(
        columns={
            "concept_id_1": "source_concept_id",
        },
        inplace=True,
    )
    long_src_to_std = to("pandas", long_src_to_std)

    # Convert long src_std to a wide src_std
    related_terminologies_concept_id = []
    for terminology in standard_terminologies:
        # Filter each terminology concept id
        terminology_filter = long_src_to_std.vocabulary_id.str.contains(
            terminology, case=False, regex=False
        )
        related_terminology_concept_id = long_src_to_std[terminology_filter].copy()
        related_terminology_concept_id.rename(
            columns={
                "concept_code": "{}_concept_code".format(terminology),
                "concept_name": "{}_concept_name".format(terminology),
                "vocabulary_id": "{}_vocabulary_id".format(terminology),
            },
            inplace=True,
        )
        related_terminology_concept_id.drop_duplicates(
            ["source_concept_id", "{}_concept_code".format(terminology)], inplace=True
        )
        related_terminologies_concept_id.append(related_terminology_concept_id)

    # Merge all terminologies
    if len(related_terminologies_concept_id) >= 2:
        wide_src_to_std = reduce(
            lambda left, right: left.merge(
                right, on=["source_concept_id", "concepts_set"], how="outer"
            ),
            related_terminologies_concept_id,
        )
    elif len(related_terminologies_concept_id) == 1:
        wide_src_to_std = related_terminologies_concept_id[0]

    else:
        return long_src_to_std[["source_concept_id"]]

    if all(
        terminology in standard_terminologies
        for terminology in default_standard_concept_regex
    ):
        # Get LOINC NAME and code from ITM
        wide_src_to_std = _override_name_code_with_itm(wide_src_to_std)

    wide_src_to_std = _rename_duplicate_code_with_different_names(wide_src_to_std)
    wide_src_to_std.fillna("Non renseigné", inplace=True)

    return wide_src_to_std