File size: 12,863 Bytes
15369ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fdf27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15369ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/user/miniconda3/envs/dwl/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>id</th><th>description</th></tr><tr><td>str</td><td>str</td></tr></thead><tbody><tr><td>&quot;02d892&quot;</td><td>&quot;a purple forest at dusk&quot;</td></tr><tr><td>&quot;0dcd2e&quot;</td><td>&quot;gray wool coat with a faux fur…</td></tr><tr><td>&quot;1e9ac1&quot;</td><td>&quot;a lighthouse overlooking the o…</td></tr><tr><td>&quot;2b25db&quot;</td><td>&quot;burgundy corduroy pants with p…</td></tr><tr><td>&quot;4e6a54&quot;</td><td>&quot;orange corduroy overalls&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 2)\n",
       "β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”\n",
       "β”‚ id     ┆ description                     β”‚\n",
       "β”‚ ---    ┆ ---                             β”‚\n",
       "β”‚ str    ┆ str                             β”‚\n",
       "β•žβ•β•β•β•β•β•β•β•β•ͺ═════════════════════════════════║\n",
       "β”‚ 02d892 ┆ a purple forest at dusk         β”‚\n",
       "β”‚ 0dcd2e ┆ gray wool coat with a faux fur… β”‚\n",
       "β”‚ 1e9ac1 ┆ a lighthouse overlooking the o… β”‚\n",
       "β”‚ 2b25db ┆ burgundy corduroy pants with p… β”‚\n",
       "β”‚ 4e6a54 ┆ orange corduroy overalls        β”‚\n",
       "β””β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We can load and explore the competition's train set to get a feel for the data.\n",
    "# We're not going to export this cell as it's not needed for our exported inferenceable model.\n",
    "\n",
    "import kagglehub\n",
    "import polars as pl\n",
    "\n",
    "train_path = kagglehub.competition_download('drawing-with-llms', 'train.csv')\n",
    "train = pl.read_csv(train_path)\n",
    "\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model:\n",
    "    def __init__(self):\n",
    "        '''Optional constructor, performs any setup logic, model instantiation, etc.'''\n",
    "        pass\n",
    "      \n",
    "    def predict(self, prompt: str) -> str:\n",
    "        '''Generates SVG which produces an image described by the prompt.\n",
    "\n",
    "        Args:\n",
    "            prompt (str): A prompt describing an image\n",
    "        Returns:\n",
    "            String of valid SVG code.\n",
    "        '''\n",
    "        # Renders a simple circle regardless of input\n",
    "        return '<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\" /></svg>'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\" /></svg>\n"
     ]
    },
    {
     "data": {
      "image/svg+xml": [
       "<svg width=\"100\" height=\"100\" viewBox=\"0 0 100 100\"><circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\"/></svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import SVG\n",
    "\n",
    "model = Model()\n",
    "svg = model.predict('a goose winning a gold medal')\n",
    "\n",
    "print(svg)\n",
    "display(SVG(svg))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['RN50',\n",
       " 'RN101',\n",
       " 'RN50x4',\n",
       " 'RN50x16',\n",
       " 'RN50x64',\n",
       " 'ViT-B/32',\n",
       " 'ViT-B/16',\n",
       " 'ViT-L/14',\n",
       " 'ViT-L/14@336px']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import clip\n",
    "clip.available_models()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-20 13:55:34.589770: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
      "E0000 00:00:1745171734.600777   13214 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
      "E0000 00:00:1745171734.603957   13214 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
      "W0000 00:00:1745171734.615566   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615584   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615585   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "W0000 00:00:1745171734.615586   13214 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
      "2025-04-20 13:55:34.618659: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n",
      "Loading checkpoint shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [00:18<00:00,  4.68s/it]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import importlib\n",
    "metric = importlib.import_module('metric')\n",
    "importlib.reload(metric)\n",
    "\n",
    "vqa_evaluator = metric.VQAEvaluator()\n",
    "aesthetic_evaluator = metric.AestheticEvaluator()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "VQA Score: 0.9996758976500401\n",
      "Aesthetic Score: 0.5749330520629883\n",
      "Final Fidelity Score: 0.8709845773271212\n"
     ]
    }
   ],
   "source": [
    "# score gpt4o generated images\n",
    "import ast\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "\n",
    "# Load the first sample from descriptions.csv\n",
    "descriptions_df = pd.read_csv('data/descriptions.csv')\n",
    "first_description = descriptions_df.iloc[1]\n",
    "\n",
    "eval_df = pd.read_csv('data/eval.csv')\n",
    "first_eval = eval_df.iloc[1]\n",
    "\n",
    "# Load the image\n",
    "image_path = 'data/gray_coat.png'  # Assuming the image is saved with this name\n",
    "image = Image.open(image_path)\n",
    "\n",
    "# Prepare the inputs for scoring - need to parse the string representations\n",
    "questions = ast.literal_eval(first_eval['question'])\n",
    "choices = ast.literal_eval(first_eval['choices'])\n",
    "answers = ast.literal_eval(first_eval['answer'])\n",
    "\n",
    "# Calculate VQA score - don't wrap in additional lists\n",
    "vqa_score = vqa_evaluator.score(questions, choices, answers, image)\n",
    "\n",
    "# Calculate aesthetic score\n",
    "aesthetic_score = aesthetic_evaluator.score(image)\n",
    "\n",
    "# Apply image processing as done in the metric.score function\n",
    "image_processor = metric.ImageProcessor(image=image, seed=0).apply()\n",
    "processed_image = image_processor.image.copy()\n",
    "\n",
    "# Calculate final fidelity score\n",
    "instance_score = metric.harmonic_mean(vqa_score, aesthetic_score, beta=0.5)\n",
    "\n",
    "print(f\"VQA Score: {vqa_score}\")\n",
    "print(f\"Aesthetic Score: {aesthetic_score}\")\n",
    "print(f\"Final Fidelity Score: {instance_score}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No duplicate IDs found in data/descriptions.csv\n",
      "Sorted rows by ID\n",
      "Fixed and sorted CSV saved to data/descriptions.csv\n",
      "No duplicate IDs found in data/eval.csv\n",
      "Sorted data/eval.csv by ID\n"
     ]
    }
   ],
   "source": [
    "# Fix duplicate IDs in descriptions.csv and order rows by id\n",
    "def fix_duplicate_ids(csv_path):\n",
    "    \"\"\"\n",
    "    Fix duplicate IDs in a CSV file by assigning new unique IDs to duplicates.\n",
    "    Then order rows by ID.\n",
    "    \"\"\"\n",
    "    # Read the CSV file\n",
    "    df = pd.read_csv(csv_path)\n",
    "    \n",
    "    # Check for duplicate IDs\n",
    "    duplicate_mask = df['id'].duplicated(keep='first')\n",
    "    duplicate_count = duplicate_mask.sum()\n",
    "    \n",
    "    if duplicate_count > 0:\n",
    "        print(f\"Found {duplicate_count} duplicate IDs in {csv_path}\")\n",
    "        \n",
    "        # Get the maximum ID value\n",
    "        max_id = df['id'].max()\n",
    "        \n",
    "        # Assign new IDs to duplicates\n",
    "        new_ids = list(range(max_id + 1, max_id + 1 + duplicate_count))\n",
    "        df.loc[duplicate_mask, 'id'] = new_ids\n",
    "        \n",
    "        print(f\"Assigned new IDs to duplicates\")\n",
    "    else:\n",
    "        print(f\"No duplicate IDs found in {csv_path}\")\n",
    "    \n",
    "    # Sort the dataframe by ID\n",
    "    df = df.sort_values(by='id')\n",
    "    print(f\"Sorted rows by ID\")\n",
    "    \n",
    "    # Save the fixed and sorted CSV\n",
    "    df.to_csv(csv_path, index=False)\n",
    "    print(f\"Fixed and sorted CSV saved to {csv_path}\")\n",
    "    \n",
    "    # Return the fixed dataframe\n",
    "    return df\n",
    "\n",
    "# Fix descriptions.csv\n",
    "fixed_descriptions_df = fix_duplicate_ids('data/descriptions.csv')\n",
    "\n",
    "# Fix eval.csv if needed\n",
    "# First check if eval.csv has the same issue\n",
    "eval_df = pd.read_csv('data/eval.csv')\n",
    "duplicate_eval_ids = eval_df['id'].duplicated(keep='first').sum()\n",
    "\n",
    "if duplicate_eval_ids > 0:\n",
    "    fixed_eval_df = fix_duplicate_ids('data/eval.csv')\n",
    "else:\n",
    "    print(\"No duplicate IDs found in data/eval.csv\")\n",
    "    # Still sort by ID even if no duplicates\n",
    "    eval_df = eval_df.sort_values(by='id')\n",
    "    eval_df.to_csv('data/eval.csv', index=False)\n",
    "    print(\"Sorted data/eval.csv by ID\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dwl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}