{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install polars-lts-cpu" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def pfbeta(labels, predictions, beta=1):\n", " y_true_count = 0\n", " ctp = 0\n", " cfp = 0\n", "\n", " for idx in range(len(labels)):\n", " prediction = min(max(predictions[idx], 0), 1)\n", " if (labels[idx]):\n", " y_true_count += 1\n", " ctp += prediction\n", " else:\n", " cfp += prediction\n", "\n", " beta_squared = beta * beta\n", " c_precision = ctp / (ctp + cfp)\n", " c_recall = ctp / y_true_count\n", " if (c_precision > 0 and c_recall > 0):\n", " result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)\n", " return result\n", " else:\n", " return 0\n", "\n", "def get_part_metrics(df: pl.DataFrame, threshold=0.3) -> dict:\n", " df = df.with_columns((df[\"preds\"] > threshold).alias(\"preds_bin\"))\n", " metrics = {}\n", " # binary metrics using the threshold\n", " metrics[\"accuracy\"] = accuracy_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"precision\"] = precision_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"recall\"] = recall_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"f1\"] = f1_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " # probabilistic F1 (doesn't depend on the threshold)\n", " metrics[\"pf1\"] = pfbeta(df[\"labels\"].to_numpy(), df[\"preds\"].to_numpy())\n", " # ROC AUC\n", " metrics[\"roc_auc\"] = roc_auc_score(df[\"labels\"].to_numpy(), df[\"preds\"].to_numpy())\n", " return metrics\n", "\n", "\n", "def get_all_metrics(df: pl.DataFrame, threshold=0.3) -> pd.DataFrame:\n", " groups = [list(range(5)), [0, 1], [0, 4], [0, 2], [0, 3]]\n", " group_names = [\"all\", \"StableDiffusion\", \"Midjourney\", \"Dalle2\", \"Dalle3\"]\n", " all_metrics = []\n", " for i, g in enumerate(groups):\n", " subset = df.filter(pl.col(\"domains\").is_in(g))\n", " metrics = get_part_metrics(subset, threshold=threshold)\n", " metrics[\"group\"] = group_names[i]\n", " all_metrics.append(metrics)\n", " \n", " return pd.DataFrame(all_metrics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the data from the output files\n", "df1 = pl.read_csv('/Users/fionachow/Downloads/outputs/preds-image-classifier-1.csv')\n", "df14 = pl.read_csv('/Users/fionachow/Downloads/outputs/preds-image-classifier-14.csv')\n", "df142 = pl.read_csv('/Users/fionachow/Downloads/outputs/preds-image-classifier-142.csv')\n", "df1423 = pl.read_csv('/Users/fionachow/Downloads/outputs/preds-image-classifier-1423.csv')\n", "\n", "metrics_df1 = get_all_metrics(df1, threshold=0.5)\n", "metrics_df14 = get_all_metrics(df14, threshold=0.5)\n", "metrics_df142 = get_all_metrics(df142, threshold=0.5)\n", "metrics_df1423 = get_all_metrics(df1423, threshold=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics_df1.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sns.set()\n", "\n", "models = ['StableDiffusion', 'Midjourney', 'Dalle2', 'Dalle3']\n", "metrics = ['accuracy', 'f1', 'pf1', 'roc_auc']\n", "\n", "file_map = {\n", " ('StableDiffusion',): metrics_df1,\n", " ('StableDiffusion', 'Midjourney'): metrics_df14,\n", " ('StableDiffusion', 'Midjourney', 'Dalle2'): metrics_df142,\n", " ('StableDiffusion', 'Midjourney', 'Dalle2', 'Dalle3'): metrics_df1423,\n", "}\n", "\n", "def create_heatmap_data(metric):\n", " data = pd.DataFrame(index=models[::-1], columns=models)\n", " for i, model_x in enumerate(models):\n", " for j, model_y in enumerate(models[::-1]):\n", " \n", " if i == 0:\n", " relevant_df = metrics_df1\n", " elif i == 1:\n", " relevant_df = metrics_df14\n", " elif i == 2:\n", " relevant_df = metrics_df142\n", " else:\n", " relevant_df = metrics_df1423\n", "\n", " # Debugging: print the DataFrame being used and the model_y\n", " #print(f\"Using DataFrame for {models[:i+1]}, model_y: {model_y}\")\n", "\n", " # Extract the metric value\n", " if model_y in relevant_df['group'].values:\n", " metric_value = relevant_df[relevant_df['group'] == model_y][metric].values[0]\n", " # Debugging: print the extracted metric value\n", " #print(f\"Metric value for {model_y}: {metric_value}\")\n", " else:\n", " metric_value = float('nan') # Handle non-existent cases\n", " # Debugging: print a message for non-existent cases\n", " #print(f\"No data for combination: {model_x}, {model_y}\")\n", "\n", " data.at[model_y, model_x] = metric_value\n", " \n", " for col in data.columns:\n", " data[col] = pd.to_numeric(data[col], errors='coerce')\n", "\n", " # Debugging: print the final DataFrame\n", " # print(f\"Final Data for metric {metric}:\")\n", " # print(data)\n", " # print(data.dtypes)\n", " return data\n", "\n", "for metric in metrics:\n", " plt.figure(figsize=(10, 8))\n", " sns.heatmap(create_heatmap_data(metric), annot=True, cmap='coolwarm', fmt='.3f')\n", " plt.title(f\"Heatmap for {metric}\")\n", " plt.xlabel(\"Trained On (x-axis)\")\n", " plt.ylabel(\"Tested On (y-axis)\")\n", " plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "bloom", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 2 }