Spaces:

Timxjl
/

text2svg-demo-app

Paused

File size: 12,863 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/user/miniconda3/envs/dwl/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>id</th><th>description</th></tr><tr><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;02d892&quot;</td><td>&quot;a purple forest at dusk&quot;</td></tr><tr><td>&quot;0dcd2e&quot;</td><td>&quot;gray wool coat with a faux fur…</td></tr><tr><td>&quot;1e9ac1&quot;</td><td>&quot;a lighthouse overlooking the o…</td></tr><tr><td>&quot;2b25db&quot;</td><td>&quot;burgundy corduroy pants with p…</td></tr><tr><td>&quot;4e6a54&quot;</td><td>&quot;orange corduroy overalls&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 2)\n",
       "┌────────┬─────────────────────────────────┐\n",
       "│ id     ┆ description                     │\n",
       "│ ---    ┆ ---                             │\n",
       "│ str    ┆ str                             │\n",
       "╞════════╪═════════════════════════════════╡\n",
       "│ 02d892 ┆ a purple forest at dusk         │\n",
       "│ 0dcd2e ┆ gray wool coat with a faux fur… │\n",
       "│ 1e9ac1 ┆ a lighthouse overlooking the o… │\n",
       "│ 2b25db ┆ burgundy corduroy pants with p… │\n",
       "│ 4e6a54 ┆ orange corduroy overalls        │\n",
       "└────────┴─────────────────────────────────┘"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can load and explore the competition's train set to get a feel for the data.\n",
    "# We're not going to export this cell as it's not needed for our exported inferenceable model.\n",
    "\n",
    "import kagglehub\n",
    "import polars as pl\n",
    "\n",
    "train_path = kagglehub.competition_download('drawing-with-llms', 'train.csv')\n",
    "train = pl.read_csv(train_path)\n",
    "\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model:\n",
    "    def __init__(self):\n",
    "        '''Optional constructor, performs any setup logic, model instantiation, etc.'''\n",
    "        pass\n",
    "      \n",
    "    def predict(self, prompt: str) -> str:\n",
    "        '''Generates SVG which produces an image described by the prompt.\n",
    "\n",
    "        Args:\n",
    "            prompt (str): A prompt describing an image\n",
    "        Returns:\n",
    "            String of valid SVG code.\n",
    "        '''\n",
    "        # Renders a simple circle regardless of input\n",
    "        return '<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\" /></svg>'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\" /></svg>\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\"/></svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import SVG\n",
    "\n",
    "model = Model()\n",
    "svg = model.predict('a goose winning a gold medal')\n",
    "\n",
    "print(svg)\n",
    "display(SVG(svg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['RN50',\n",
       " 'RN101',\n",
       " 'RN50x4',\n",
       " 'RN50x16',\n",
       " 'RN50x64',\n",
       " 'ViT-B/32',\n",
       " 'ViT-B/16',\n",
       " 'ViT-L/14',\n",
       " 'ViT-L/14@336px']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import clip\n",
    "clip.available_models()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-20 13:55:34.589770: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
      "E0000 00:00:1745171734.600777   13214 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "E0000 00:00:1745171734.603957   13214 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "W0000 00:00:1745171734.615566   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615584   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615585   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615586   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "2025-04-20 13:55:34.618659: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n",
      "Loading checkpoint shards: 100%|██████████| 4/4 [00:18<00:00,  4.68s/it]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import importlib\n",
    "metric = importlib.import_module('metric')\n",
    "importlib.reload(metric)\n",
    "\n",
    "vqa_evaluator = metric.VQAEvaluator()\n",
    "aesthetic_evaluator = metric.AestheticEvaluator()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "VQA Score: 0.9996758976500401\n",
      "Aesthetic Score: 0.5749330520629883\n",
      "Final Fidelity Score: 0.8709845773271212\n"
     ]
    }
   ],
   "source": [
    "# score gpt4o generated images\n",
    "import ast\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "\n",
    "# Load the first sample from descriptions.csv\n",
    "descriptions_df = pd.read_csv('data/descriptions.csv')\n",
    "first_description = descriptions_df.iloc[1]\n",
    "\n",
    "eval_df = pd.read_csv('data/eval.csv')\n",
    "first_eval = eval_df.iloc[1]\n",
    "\n",
    "# Load the image\n",
    "image_path = 'data/gray_coat.png'  # Assuming the image is saved with this name\n",
    "image = Image.open(image_path)\n",
    "\n",
    "# Prepare the inputs for scoring - need to parse the string representations\n",
    "questions = ast.literal_eval(first_eval['question'])\n",
    "choices = ast.literal_eval(first_eval['choices'])\n",
    "answers = ast.literal_eval(first_eval['answer'])\n",
    "\n",
    "# Calculate VQA score - don't wrap in additional lists\n",
    "vqa_score = vqa_evaluator.score(questions, choices, answers, image)\n",
    "\n",
    "# Calculate aesthetic score\n",
    "aesthetic_score = aesthetic_evaluator.score(image)\n",
    "\n",
    "# Apply image processing as done in the metric.score function\n",
    "image_processor = metric.ImageProcessor(image=image, seed=0).apply()\n",
    "processed_image = image_processor.image.copy()\n",
    "\n",
    "# Calculate final fidelity score\n",
    "instance_score = metric.harmonic_mean(vqa_score, aesthetic_score, beta=0.5)\n",
    "\n",
    "print(f\"VQA Score: {vqa_score}\")\n",
    "print(f\"Aesthetic Score: {aesthetic_score}\")\n",
    "print(f\"Final Fidelity Score: {instance_score}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No duplicate IDs found in data/descriptions.csv\n",
      "Sorted rows by ID\n",
      "Fixed and sorted CSV saved to data/descriptions.csv\n",
      "No duplicate IDs found in data/eval.csv\n",
      "Sorted data/eval.csv by ID\n"
     ]
    }
   ],
   "source": [
    "# Fix duplicate IDs in descriptions.csv and order rows by id\n",
    "def fix_duplicate_ids(csv_path):\n",
    "    \"\"\"\n",
    "    Fix duplicate IDs in a CSV file by assigning new unique IDs to duplicates.\n",
    "    Then order rows by ID.\n",
    "    \"\"\"\n",
    "    # Read the CSV file\n",
    "    df = pd.read_csv(csv_path)\n",
    "    \n",
    "    # Check for duplicate IDs\n",
    "    duplicate_mask = df['id'].duplicated(keep='first')\n",
    "    duplicate_count = duplicate_mask.sum()\n",
    "    \n",
    "    if duplicate_count > 0:\n",
    "        print(f\"Found {duplicate_count} duplicate IDs in {csv_path}\")\n",
    "        \n",
    "        # Get the maximum ID value\n",
    "        max_id = df['id'].max()\n",
    "        \n",
    "        # Assign new IDs to duplicates\n",
    "        new_ids = list(range(max_id + 1, max_id + 1 + duplicate_count))\n",
    "        df.loc[duplicate_mask, 'id'] = new_ids\n",
    "        \n",
    "        print(f\"Assigned new IDs to duplicates\")\n",
    "    else:\n",
    "        print(f\"No duplicate IDs found in {csv_path}\")\n",
    "    \n",
    "    # Sort the dataframe by ID\n",
    "    df = df.sort_values(by='id')\n",
    "    print(f\"Sorted rows by ID\")\n",
    "    \n",
    "    # Save the fixed and sorted CSV\n",
    "    df.to_csv(csv_path, index=False)\n",
    "    print(f\"Fixed and sorted CSV saved to {csv_path}\")\n",
    "    \n",
    "    # Return the fixed dataframe\n",
    "    return df\n",
    "\n",
    "# Fix descriptions.csv\n",
    "fixed_descriptions_df = fix_duplicate_ids('data/descriptions.csv')\n",
    "\n",
    "# Fix eval.csv if needed\n",
    "# First check if eval.csv has the same issue\n",
    "eval_df = pd.read_csv('data/eval.csv')\n",
    "duplicate_eval_ids = eval_df['id'].duplicated(keep='first').sum()\n",
    "\n",
    "if duplicate_eval_ids > 0:\n",
    "    fixed_eval_df = fix_duplicate_ids('data/eval.csv')\n",
    "else:\n",
    "    print(\"No duplicate IDs found in data/eval.csv\")\n",
    "    # Still sort by ID even if no duplicates\n",
    "    eval_df = eval_df.sort_values(by='id')\n",
    "    eval_df.to_csv('data/eval.csv', index=False)\n",
    "    print(\"Sorted data/eval.csv by ID\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dwl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}