Skip to content

Cerebrovascular accident[source]

The eds.cerebrovascular_accident pipeline component extracts mentions of cerebrovascular accident. It will notably match:

  • Mentions of AVC/AIT
  • Mentions of bleeding, hemorrhage, thrombus, ischemia, etc., localized in the brain
Details of the used patterns
# fmt: off
import re

from edsnlp.utils.resources import get_AVC_care_site

from ..terms import BRAIN, HEART, PERIPHERAL

AVC_CARE_SITES_REGEX = [
    r"\b" + re.escape(cs.strip()) + r"\b" for cs in get_AVC_care_site(prefix=True)
] + [
    r"h[oô]p",
    r"\brcp",
    r"service",
    r"\bsau",
    r"ap.?hp",
    r"\burg",
    r"finess",
    r"\bsiret",
    r"[àa] avc",
    r"consult",
]

avc = dict(
    source="avc",
    regex=[
        r"\bavc\b",
    ],
    exclude=[
        dict(
            regex=AVC_CARE_SITES_REGEX,
            window=(-5, 5),
            regex_flags=re.S | re.I,
            limit_to_sentence=False,
        ),
        dict(
            regex=r"\b[a-z]\.",
            window=2,
            limit_to_sentence=False,
        ),
    ],
    regex_attr="NORM",
)

with_localization = dict(
    source="with_localization",
    regex=[
        r"(hemorr?agie|hematome)",
        r"angiopath",
        r"angioplasti",
        r"infarctus",
        r"occlusion",
        r"saignement",
        r"embol",
        r"vascularite",
        r"\bhsd\b",
        r"thrombos",
        r"thrombol[^y]",
        r"thrombophi",
        r"thrombi[^n]",
        r"thrombus",
        r"thrombectomi",
        r"phleb",
    ],
    regex_attr="NORM",
    exclude=[
        dict(
            regex=r"pulmo|poumon",
            window=4,
        ),
    ],
    assign=[
        dict(
            name="brain_localized",
            regex="(" + r"|".join(BRAIN) + ")",
            window=(-15, 15),
            limit_to_sentence=False,
            include_assigned=False,
        ),
    ],
)

general = dict(
    source="general",
    regex=[
        r"accident.{1,5}vasculaires.{1,5}cereb",
        r"accident.{1,5}vasculaire.{1,5}ischemi",
        r"accident.{1,5}ischemi",
        r"moya.?moya",
        r"occlusion.{1,5}(artere|veine).{1,20}retine",
        r"vasculopathies?.cerebrales?.ischemique",
        r"maladies?.des.petites.arteres",
        r"maladies?.des.petits.vaisseaux",
        r"thrombolyse",
        r"\bsusac\b",
    ],
    regex_attr="NORM",
)

acronym = dict(
    source="acronym",
    regex=[
        r"\bAIC\b",
        r"\bOACR\b",
        r"\bOVCR\b",
    ],
    regex_attr="TEXT",
)

AIT = dict(
    source="AIT",
    regex=[
        r"\bAIC\b",
        r"\bOACR\b",
        r"\bOVCR\b",
        r"\bAIT\b",
    ],
    regex_attr="TEXT",
)

ischemia = dict(
    source="ischemia",
    regex=[
        r"ischemi",
    ],
    exclude=[
        dict(
            regex=PERIPHERAL + HEART,
            window=(-7, 7),
        ),
    ],
    assign=[
        dict(
            name="brain",
            regex="(" + r"|".join(BRAIN) + ")",
            window=(-10, 15),
        ),
    ],
    regex_attr="NORM",
)

default_patterns = [
    avc,
    with_localization,
    general,
    acronym,
    AIT,
    ischemia,
]
# fmt: on

Extensions

On each span span that match, the following attributes are available:

  • span._.detailed_status: set to None

Usage

import edsnlp, edsnlp.pipes as eds

nlp = edsnlp.blank("eds")
nlp.add_pipe(eds.sentences())
nlp.add_pipe(
    eds.normalizer(
        accents=True,
        lowercase=True,
        quotes=True,
        spaces=True,
        pollution=dict(
            information=True,
            bars=True,
            biology=True,
            doctors=True,
            web=True,
            coding=True,
            footer=True,
        ),
    ),
)
nlp.add_pipe(eds.cerebrovascular_accident())

Below are a few examples:

text = "Patient hospitalisé à AVC."
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: []
text = "Hospitalisation pour un AVC."
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [AVC]
text = "Saignement intracranien"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Saignement]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [intracranien]}
text = "Thrombose périphérique"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: []
text = "Thrombose sylvienne"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Thrombose]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [sylvienne]}
text = "Infarctus cérébral"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Infarctus]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [cérébral]}
text = "Soigné via un thrombolyse"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [thrombolyse]

Parameters

PARAMETER DESCRIPTION
nlp

The pipeline

TYPE: Optional[PipelineProtocol]

name

The name of the component

TYPE: Optional[str]

patterns

The patterns to use for matching

DEFAULT: [{'source': 'avc', 'regex': ['\\bavc\\b'], 'exc...

label

The label to use for the Span object and the extension

TYPE: str DEFAULT: cerebrovascular_accident

span_setter

How to set matches on the doc

TYPE: SpanSetterArg DEFAULT: {'ents': True, 'cerebrovascular_accident': True}

Authors and citation

The eds.cerebrovascular_accident component was developed by AP-HP's Data Science team with a team of medical experts, following the insights of the algorithm proposed by Petit-Jean et al., 2024.


  1. Petit-Jean T., Gérardin C., Berthelot E., Chatellier G., Frank M., Tannier X., Kempf E. and Bey R., 2024. Collaborative and privacy-enhancing workflows on a clinical data warehouse: an example developing natural language processing pipelines to detect medical conditions. Journal of the American Medical Informatics Association. 31, pp.1280-1290. 10.1093/jamia/ocae069