Consultation dates - eds-scikit

def get_consultation_dates_nlp(
    note_nlp: DataFrame,
    dates_to_keep: str = "min",
) -> DataFrame:
    """
    Uses consultation dates extracted *a priori* in consultation reports to infer *true* consultation dates

    Parameters
    ----------
    note_nlp : DataFrame
        A DataFrame with (at least) the following columns:

        - `note_id`
        - `consultation_date`
        - `end` **if** using `dates_to_keep=first`:
        `end` should store the character offset of the extracted date.
    dates_to_keep : str, optional
        How to handle multiple consultation dates found in the document:

        - `min`: keep the oldest one
        - `first`: keep the occurrence that appeared first in the text
        - `all`: keep all date

    Returns
    -------
    Dataframe
        With 2 added columns corresponding to the following concept:

        - `CONSULTATION_DATE`, containing the date
        - `CONSULTATION_DATE_EXTRACTION`, containing `"NLP"`
    """

    if dates_to_keep == "min":
        dates_per_note = note_nlp.groupby("note_id").agg(
            CONSULTATION_DATE=("consultation_date", "min"),
        )
    elif dates_to_keep == "first":
        dates_per_note = (
            note_nlp.sort_values(by="start")
            .groupby("note_id")
            .agg(CONSULTATION_DATE=("consultation_date", "first"))
        )
    elif dates_to_keep == "all":
        dates_per_note = note_nlp[["consultation_date", "note_id"]].set_index("note_id")
        dates_per_note = dates_per_note.rename(
            columns={"consultation_date": "CONSULTATION_DATE"}
        )
    dates_per_note["CONSULTATION_DATE_EXTRACTION"] = "NLP"

    return dates_per_note