edsnlp.pipelines.core.normalizer.pollution.patterns
information = "(?s)(=====+\\s*)?(L\\s*e\\s*s\\sdonnées\\s*administratives,\\s*sociales\\s*|I?nfo\\s*rmation\\s*aux?\\s*patients?|L[’']AP-HP\\s*collecte\\s*vos\\s*données\\s*administratives|L[’']Assistance\\s*Publique\\s*-\\s*Hôpitaux\\s*de\\s*Paris\\s*\\(?AP-HP\\)?\\s*a\\s*créé\\s*une\\s*base\\s*de\\s*données).{,2000}https?:\\/\\/recherche\\.aphp\\.fr\\/eds\\/droit-opposition[\\s\\.]*"
module-attribute
bars = '(?i)([nbw]|_|-|=){5,}'
module-attribute
biology = '(\\b.*[|¦].*\\n)+'
module-attribute
doctors = '(?mi)(^((dr)|(pr))(\\.|\\s|of).*)+'
module-attribute
web = '(www\\.\\S*)|(\\S*@\\S*)'
module-attribute
coding = '.*?[a-zA-Z]\\d{2,4}.*?(\\n|[a-zA-Z]\\d{2,4})'
module-attribute
footer = '(?i)^\\d\\/\\d\\s?pat.*ipp.*\\n?'
module-attribute
pollution = dict(information=information, bars=bars, biology=biology, doctors=doctors, web=web, coding=coding, footer=footer)
module-attribute