fm4m-eval-demo / property_prediction_helpers.py
ipd's picture
init
5306c2a
raw
history blame
8.74 kB
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import xgboost as xgb
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"
import models.fm4m as fm4m
# Function to create model based on user input
def _create_model(
model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None
):
if model_name == "XGBClassifier":
model = xgb.XGBClassifier(
objective='binary:logistic',
eval_metric='auc',
max_depth=max_depth,
n_estimators=n_estimators,
alpha=alpha,
)
elif model_name == "SVR":
model = SVR(degree=degree, kernel=kernel)
elif model_name == "Kernel Ridge":
model = KernelRidge(alpha=alpha, degree=degree, kernel=kernel)
elif model_name == "Linear Regression":
model = LinearRegression()
elif model_name == "Default - Auto":
return "Default Settings"
else:
return "Model not supported."
return f"{model_name} * {model.get_params()}"
# Function to handle model creation based on input parameters
def create_downstream_model(state):
model_name, max_depth, n_estimators, alpha, degree, kernel = (
state["model_name"],
state.get("max_depth"),
state.get("n_estimators"),
state.get("alpha"),
state.get("degree"),
state.get("kernel"),
)
if model_name == "XGBClassifier":
return _create_model(
model_name,
max_depth=max_depth,
n_estimators=n_estimators,
alpha=alpha,
)
elif model_name == "SVR":
return _create_model(model_name, degree=degree, kernel=kernel)
elif model_name == "Kernel Ridge":
return _create_model(model_name, alpha=alpha, degree=degree, kernel=kernel)
elif model_name == "Linear Regression":
return _create_model(model_name)
elif model_name == "Default - Auto":
return _create_model(model_name)
# Function to display evaluation score
def display_eval(selected_models, dataset, task_type, state, plot_state):
downstream = create_downstream_model(state)
state = plot_state
result = None
try:
downstream_model = downstream.split("*")[0].lstrip()
downstream_model = downstream_model.rstrip()
hyp_param = downstream.split("*")[-1].lstrip()
hyp_param = hyp_param.rstrip()
hyp_param = hyp_param.replace("nan", "float('nan')")
params = eval(hyp_param)
except:
downstream_model = downstream.split("*")[0].lstrip()
downstream_model = downstream_model.rstrip()
params = None
try:
if not selected_models:
return "Please select at least one enabled model."
if len(selected_models) > 1:
if task_type == "Classification":
if downstream_model == "Default Settings":
downstream_model = "DefaultClassifier"
params = None
(
result,
state["roc_auc"],
state["fpr"],
state["tpr"],
state["x_batch"],
state["y_batch"],
) = fm4m.multi_modal(
model_list=selected_models,
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
elif task_type == "Regression":
if downstream_model == "Default Settings":
downstream_model = "DefaultRegressor"
params = None
(
result,
state["RMSE"],
state["y_batch_test"],
state["y_prob"],
state["x_batch"],
state["y_batch"],
) = fm4m.multi_modal(
model_list=selected_models,
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
else:
if task_type == "Classification":
if downstream_model == "Default Settings":
downstream_model = "DefaultClassifier"
params = None
(
result,
state["roc_auc"],
state["fpr"],
state["tpr"],
state["x_batch"],
state["y_batch"],
) = fm4m.single_modal(
model=selected_models[0],
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
elif task_type == "Regression":
if downstream_model == "Default Settings":
downstream_model = "DefaultRegressor"
params = None
(
result,
state["RMSE"],
state["y_batch_test"],
state["y_prob"],
state["x_batch"],
state["y_batch"],
) = fm4m.single_modal(
model=selected_models[0],
downstream_model=downstream_model,
params=params,
dataset=dataset,
)
except Exception as e:
return f"An error occurred: {e}"
return result or "Data & Model Setting is incorrect"
# Function to handle plot display
def display_plot(plot_type, state):
fig, ax = plt.subplots()
if plot_type == "Latent Space":
x_batch, y_batch = state.get("x_batch"), state.get("y_batch")
ax.set_title("T-SNE Plot")
class_0 = x_batch
class_1 = y_batch
plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1')
plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('Dataset Distribution')
elif plot_type == "ROC-AUC":
roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
ax.set_title("ROC-AUC Curve")
try:
ax.plot(
fpr,
tpr,
color='darkorange',
lw=2,
label=f'ROC curve (area = {roc_auc:.4f})',
)
ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
except:
pass
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Receiver Operating Characteristic')
ax.legend(loc='lower right')
elif plot_type == "Parity Plot":
RMSE, y_batch_test, y_prob = (
state.get("RMSE"),
state.get("y_batch_test"),
state.get("y_prob"),
)
ax.set_title("Parity plot")
# change format
try:
print(y_batch_test)
print(y_prob)
y_batch_test = np.array(y_batch_test, dtype=float)
y_prob = np.array(y_prob, dtype=float)
ax.scatter(
y_batch_test,
y_prob,
color="blue",
label=f"Predicted vs Actual (RMSE: {RMSE:.4f})",
)
min_val = min(min(y_batch_test), min(y_prob))
max_val = max(max(y_batch_test), max(y_prob))
ax.plot([min_val, max_val], [min_val, max_val], 'r-')
except:
y_batch_test = []
y_prob = []
RMSE = None
print(y_batch_test)
print(y_prob)
ax.set_xlabel('Actual Values')
ax.set_ylabel('Predicted Values')
ax.legend(loc='lower right')
return fig
# Function to handle evaluation and logging
def evaluate_and_log(selected_models, dataset, task_type, log_df, state):
log_df = log_df[log_df['id'] != '']
id = len(log_df) + 1
plot_state = {"roc_auc": None, "RMSE": None, "x_batch": None}
state["results"][id] = plot_state
eval_output = display_eval(selected_models, dataset, task_type, state, plot_state)
new_entry_df = pd.DataFrame(
[
{
"id": id,
"Model": " + ".join(selected_models),
"Score": eval_output.replace(" Score", ""),
}
]
)
log_df = pd.concat([log_df, new_entry_df])
return log_df