Inference individual splitters

In [ ]:

Copied!





from twinweaver import (
    DataSplitterForecasting,
    DataManager,
    DataSplitterEvents,
    ConverterInstruction,
    Config,
)
import pandas as pd
from twinweaver import (
    DataSplitterForecasting,
    DataManager,
    DataSplitterEvents,
    ConverterInstruction,
    Config,
)
import pandas as pd

In [ ]:

Copied!





class ConvertToText:
    def __init__(
        self,
    ):
        # Set basics
        self.config = Config()
        self.config.constant_columns_to_use = [
            "birthyear",
            "gender",
            "histology",
            "smoking_history",
        ]  # Manually set from constant
        self.config.constant_birthdate_column = "birthyear"

        # Load data
        df_events = pd.read_csv("./examples/example_data/events.csv")
        df_constant = pd.read_csv("./examples/example_data/constant.csv")
        df_constant_description = pd.read_csv("./examples/example_data/constant_description.csv")

        # Init data managers
        self.dm = DataManager(config=self.config)
        self.dm.load_indication_data(
            df_events=df_events, df_constant=df_constant, df_constant_description=df_constant_description
        )
        self.dm.process_indication_data()
        self.dm.setup_unique_mapping_of_events()
        self.dm.setup_dataset_splits()
        self.dm.infer_var_types()

        self.data_splitter_events = DataSplitterEvents(self.dm, config=self.config)
        self.data_splitter_events.setup_variables()
        self.data_splitter_forecasting = DataSplitterForecasting(data_manager=self.dm, config=self.config)
        self.data_splitter_forecasting.setup_statistics()
        self.converter = ConverterInstruction(
            nr_tokens_budget_total=8192,
            config=self.config,
            dm=self.dm,
        )

    def convert_full_to_string_for_one_patient(self, patientid, override_events_or_forecasting="forecasting"):
        patient_data = self.dm.get_patient_data(patientid)
        patient_data["events"] = patient_data["events"].sort_values("date")

        # To simulate that we only have input, half the events
        patient_data["events"] = patient_data["events"].iloc[: int(len(patient_data["events"]) / 2)]

        # Here then split date
        split_date = patient_data["events"]["date"].iloc[-1]

        #: generate event split - NOTE: this if statement is only to exemplify both cases!
        if override_events_or_forecasting == "events":
            ####### Example if we want to override for events

            events_splits = self.data_splitter_events.get_splits_from_patient(
                patient_data,
                max_nr_samples=1,
                override_split_dates=[split_date],
                override_category="death",
                override_end_week_delta=52,
            )
            # We just pick the first one
            events_split = events_splits[0][0]

            #: no forecasting split
            forecast_split = None
            forecasting_times_to_predict = None
        else:
            ####### Example if we want to override for forecasting

            #: generate forecasting split
            forecast_splits = self.data_splitter_forecasting.get_splits_from_patient(
                patient_data,
                nr_samples_per_split=1,
                filter_outliers=False,
                override_split_dates=[split_date],
                override_variables_to_predict=["lab_26499_4"],
            )
            # We just pick the first one
            forecast_split = forecast_splits[0][0]

            # We set which weeks to predict
            forecasting_times_to_predict = {
                "lab_26499_4": [1, 2, 8, 11],
            }

            #: no events split
            events_split = None

        # Convert to text
        converted = self.converter.forward_conversion_inference(
            forecasting_split=forecast_split,
            forecasting_future_weeks_per_variable=forecasting_times_to_predict,
            event_split=events_split,
            custom_tasks=None,
        )
        return converted
class ConvertToText:
    def __init__(
        self,
    ):
        # Set basics
        self.config = Config()
        self.config.constant_columns_to_use = [
            "birthyear",
            "gender",
            "histology",
            "smoking_history",
        ]  # Manually set from constant
        self.config.constant_birthdate_column = "birthyear"

        # Load data
        df_events = pd.read_csv("./examples/example_data/events.csv")
        df_constant = pd.read_csv("./examples/example_data/constant.csv")
        df_constant_description = pd.read_csv("./examples/example_data/constant_description.csv")

        # Init data managers
        self.dm = DataManager(config=self.config)
        self.dm.load_indication_data(
            df_events=df_events, df_constant=df_constant, df_constant_description=df_constant_description
        )
        self.dm.process_indication_data()
        self.dm.setup_unique_mapping_of_events()
        self.dm.setup_dataset_splits()
        self.dm.infer_var_types()

        self.data_splitter_events = DataSplitterEvents(self.dm, config=self.config)
        self.data_splitter_events.setup_variables()
        self.data_splitter_forecasting = DataSplitterForecasting(data_manager=self.dm, config=self.config)
        self.data_splitter_forecasting.setup_statistics()
        self.converter = ConverterInstruction(
            nr_tokens_budget_total=8192,
            config=self.config,
            dm=self.dm,
        )

    def convert_full_to_string_for_one_patient(self, patientid, override_events_or_forecasting="forecasting"):
        patient_data = self.dm.get_patient_data(patientid)
        patient_data["events"] = patient_data["events"].sort_values("date")

        # To simulate that we only have input, half the events
        patient_data["events"] = patient_data["events"].iloc[: int(len(patient_data["events"]) / 2)]

        # Here then split date
        split_date = patient_data["events"]["date"].iloc[-1]

        #: generate event split - NOTE: this if statement is only to exemplify both cases!
        if override_events_or_forecasting == "events":
            ####### Example if we want to override for events

            events_splits = self.data_splitter_events.get_splits_from_patient(
                patient_data,
                max_nr_samples=1,
                override_split_dates=[split_date],
                override_category="death",
                override_end_week_delta=52,
            )
            # We just pick the first one
            events_split = events_splits[0][0]

            #: no forecasting split
            forecast_split = None
            forecasting_times_to_predict = None
        else:
            ####### Example if we want to override for forecasting

            #: generate forecasting split
            forecast_splits = self.data_splitter_forecasting.get_splits_from_patient(
                patient_data,
                nr_samples_per_split=1,
                filter_outliers=False,
                override_split_dates=[split_date],
                override_variables_to_predict=["lab_26499_4"],
            )
            # We just pick the first one
            forecast_split = forecast_splits[0][0]

            # We set which weeks to predict
            forecasting_times_to_predict = {
                "lab_26499_4": [1, 2, 8, 11],
            }

            #: no events split
            events_split = None

        # Convert to text
        converted = self.converter.forward_conversion_inference(
            forecasting_split=forecast_split,
            forecasting_future_weeks_per_variable=forecasting_times_to_predict,
            event_split=events_split,
            custom_tasks=None,
        )
        return converted

In [ ]:

Copied!

################################### Running the example #######################################
converter = ConvertToText()
################################### Running the example #######################################
converter = ConvertToText()

Example on how to run conversion for inference (i.e. we do not have target) Here we predict 52 week survival (as an event), and no forecasting

NOTE: run this from the root folder of twinweaver

In [ ]:

Copied!

all_patientids = converter.dm.all_patientids.copy()
all_patientids = all_patientids[:10]
all_patientids = converter.dm.all_patientids.copy()
all_patientids = all_patientids[:10]

In [ ]:

Copied!





for idx, patientid in enumerate(all_patientids):
    print(idx)

    #: go through all patients and convert them
    patient_data = converter.convert_full_to_string_for_one_patient(
        patientid, override_events_or_forecasting="forecasting"
    )
    print(patient_data["instruction"])
for idx, patientid in enumerate(all_patientids):
    print(idx)

    #: go through all patients and convert them
    patient_data = converter.convert_full_to_string_for_one_patient(
        patientid, override_events_or_forecasting="forecasting"
    )
    print(patient_data["instruction"])

In [ ]:

Copied!

print("Finished")
print("Finished")