Cerebrovascular accident

The eds.cerebrovascular_accident pipeline component extracts mentions of cerebrovascular accident. It will notably match:

Mentions of AVC/AIT
Mentions of bleeding, hemorrhage, thrombus, ischemia, etc., localized in the brain

Details of the used patterns

# fmt: off
import re

from edsnlp.utils.resources import get_AVC_care_site

from ..terms import BRAIN, HEART, PERIPHERAL

AVC_CARE_SITES_REGEX = [
    r"\b" + re.escape(cs.strip()) + r"\b" for cs in get_AVC_care_site(prefix=True)
] + [
    r"h[oô]p",
    r"\brcp",
    r"service",
    r"\bsau",
    r"ap.?hp",
    r"\burg",
    r"finess",
    r"\bsiret",
    r"[àa] avc",
    r"consult",
]

avc = dict(
    source="avc",
    regex=[
        r"\bavc\b",
    ],
    exclude=[
        dict(
            regex=AVC_CARE_SITES_REGEX,
            window=(-5, 5),
            regex_flags=re.S | re.I,
            limit_to_sentence=False,
        ),
        dict(
            regex=r"\b[a-z]\.",
            window=2,
            limit_to_sentence=False,
        ),
    ],
    regex_attr="NORM",
)

with_localization = dict(
    source="with_localization",
    regex=[
        r"(hemorr?agie|hematome)",
        r"angiopath",
        r"angioplasti",
        r"infarctus",
        r"occlusion",
        r"saignement",
        r"embol",
        r"vascularite",
        r"\bhsd\b",
        r"thrombos",
        r"thrombol[^y]",
        r"thrombophi",
        r"thrombi[^n]",
        r"thrombus",
        r"thrombectomi",
        r"phleb",
    ],
    regex_attr="NORM",
    exclude=[
        dict(
            regex=r"pulmo|poumon",
            window=4,
        ),
    ],
    assign=[
        dict(
            name="brain_localized",
            regex="(" + r"|".join(BRAIN) + ")",
            window=(-15, 15),
            limit_to_sentence=False,
            include_assigned=False,
        ),
    ],
)

general = dict(
    source="general",
    regex=[
        r"accident.{1,5}vasculaires.{1,5}cereb",
        r"accident.{1,5}vasculaire.{1,5}ischemi",
        r"accident.{1,5}ischemi",
        r"moya.?moya",
        r"occlusion.{1,5}(artere|veine).{1,20}retine",
        r"vasculopathies?.cerebrales?.ischemique",
        r"maladies?.des.petites.arteres",
        r"maladies?.des.petits.vaisseaux",
        r"thrombolyse",
        r"\bsusac\b",
    ],
    regex_attr="NORM",
)

acronym = dict(
    source="acronym",
    regex=[
        r"\bAIC\b",
        r"\bOACR\b",
        r"\bOVCR\b",
    ],
    regex_attr="TEXT",
)

AIT = dict(
    source="AIT",
    regex=[
        r"\bAIC\b",
        r"\bOACR\b",
        r"\bOVCR\b",
        r"\bAIT\b",
    ],
    regex_attr="TEXT",
)

ischemia = dict(
    source="ischemia",
    regex=[
        r"ischemi",
    ],
    exclude=[
        dict(
            regex=PERIPHERAL + HEART,
            window=(-7, 7),
        ),
    ],
    assign=[
        dict(
            name="brain",
            regex="(" + r"|".join(BRAIN) + ")",
            window=(-10, 15),
        ),
    ],
    regex_attr="NORM",
)

default_patterns = [
    avc,
    with_localization,
    general,
    acronym,
    AIT,
    ischemia,
]
# fmt: on

Extensions

On each span span that match, the following attributes are available:

span._.detailed_status: set to "PRESENT"

Usage

import edsnlp

nlp = edsnlp.blank("eds")
nlp.add_pipe("eds.sentences")
nlp.add_pipe(
    "eds.normalizer",
    config=dict(
        accents=True,
        lowercase=True,
        quotes=True,
        spaces=True,
        pollution=dict(
            information=True,
            bars=True,
            biology=True,
            doctors=True,
            web=True,
            coding=True,
            footer=True,
        ),
    ),
)
nlp.add_pipe(f"eds.cerebrovascular_accident")

Below are a few examples:

1234567

text = "Patient hospitalisé à AVC."
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: []

text = "Hospitalisation pour un AVC."
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [AVC]

text = "Saignement intracranien"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Saignement]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [intracranien]}

text = "Thrombose périphérique"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: []

text = "Thrombose sylvienne"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Thrombose]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [sylvienne]}

text = "Infarctus cérébral"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [Infarctus]

span = spans[0]

span._.assigned
# Out: {'brain_localized': [cérébral]}

text = "Soigné via un thrombolyse"
doc = nlp(text)
spans = doc.spans["cerebrovascular_accident"]

spans
# Out: [thrombolyse]

Parameters

PARAMETER	DESCRIPTION
`nlp`	The pipeline TYPE: `Optional[PipelineProtocol]`
`name`	The name of the component TYPE: `Optional[str]` DEFAULT: `'eds.cerebrovascular_accident'`
`patterns`	The patterns to use for matching TYPE: `Union[Dict[str, Any], List[Dict[str, Any]]]` DEFAULT: `[{'source': 'avc', 'regex': ['\\bavc\\b'], 'exc...`
`label`	The label to use for the `Span` object and the extension TYPE: `str` DEFAULT: `cerebrovascular_accident`
`span_setter`	How to set matches on the doc TYPE: `SpanSetterArg` DEFAULT: `{'ents': True, 'cerebrovascular_accident': True}`

Authors and citation

The eds.cerebrovascular_accident component was developed by AP-HP's Data Science team with a team of medical experts. A paper describing in details the development of those components is being drafted and will soon be available.