{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import polars as pl\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, RocCurveDisplay\n", "\n", "sns.set()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def pfbeta(labels, predictions, beta=1):\n", " y_true_count = 0\n", " ctp = 0\n", " cfp = 0\n", "\n", " for idx in range(len(labels)):\n", " prediction = min(max(predictions[idx], 0), 1)\n", " if (labels[idx]):\n", " y_true_count += 1\n", " ctp += prediction\n", " else:\n", " cfp += prediction\n", "\n", " beta_squared = beta * beta\n", " c_precision = ctp / (ctp + cfp)\n", " c_recall = ctp / y_true_count\n", " if (c_precision > 0 and c_recall > 0):\n", " result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)\n", " return result\n", " else:\n", " return 0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_part_metrics(df: pl.DataFrame, threshold=0.3) -> dict:\n", " df = df.with_columns((df[\"preds\"] > threshold).alias(\"preds_bin\"))\n", " metrics = {}\n", " # binary metrics using the threshold\n", " metrics[\"accuracy\"] = accuracy_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"precision\"] = precision_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"recall\"] = recall_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " metrics[\"f1\"] = f1_score(df[\"labels\"].to_numpy(), df[\"preds_bin\"].to_numpy())\n", " # probabilistic F1 (doesn't depend on the threshold)\n", " metrics[\"pf1\"] = pfbeta(df[\"labels\"].to_numpy(), df[\"preds\"].to_numpy())\n", " # ROC AUC\n", " metrics[\"roc_auc\"] = roc_auc_score(df[\"labels\"].to_numpy(), df[\"preds\"].to_numpy())\n", " return metrics\n", "\n", "\n", "def get_all_metrics(df: pl.DataFrame, threshold=0.3) -> pd.DataFrame:\n", " groups = [list(range(5)), [0, 1], [0, 4], [0, 2], [0, 3]]\n", " group_names = [\"all\", \"StableDiffusion\", \"Midjourney\", \"Dalle2\", \"Dalle3\"]\n", " all_metrics = []\n", " for i, g in enumerate(groups):\n", " subset = df.filter(pl.col(\"domains\").is_in(g))\n", " metrics = get_part_metrics(subset, threshold=threshold)\n", " metrics[\"group\"] = group_names[i]\n", " all_metrics.append(metrics)\n", " \n", " return pd.DataFrame(all_metrics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1 = pl.read_csv(\"outputs/preds-image-classifier-1.csv\")\n", "metrics_df1 = get_all_metrics(df1, threshold=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics_df1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df14 = pl.read_csv(\"outputs/preds-image-classifier-14.csv\")\n", "metrics_df14 = get_all_metrics(df14, threshold=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics_df14" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df142 = pl.read_csv(\"outputs/preds-image-classifier-142.csv\")\n", "metrics_df142 = get_all_metrics(df142, threshold=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics_df142" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df1423 = pl.read_csv(\"outputs/preds-image-classifier-1423.csv\")\n", "metrics_df1423 = get_all_metrics(df1423, threshold=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrics_df1423" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "GenAI-image-detection-Z_9oJJe7", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }