cassandramussard
commited on
Commit
•
73ab90f
1
Parent(s):
d17f8cc
add notebook and weights
Browse files- blip.ipynb +493 -0
- config.json +27 -0
- generation_config.json +7 -0
- model.safetensors +3 -0
- preprocessor_config.json +24 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +62 -0
blip.ipynb
ADDED
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {
|
6 |
+
"id": "p5S2GYrJe6lb"
|
7 |
+
},
|
8 |
+
"source": [
|
9 |
+
"# Image to text for Airbnb images"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 1,
|
15 |
+
"metadata": {
|
16 |
+
"id": "lG3i-iiWe7l_"
|
17 |
+
},
|
18 |
+
"outputs": [
|
19 |
+
{
|
20 |
+
"name": "stderr",
|
21 |
+
"output_type": "stream",
|
22 |
+
"text": [
|
23 |
+
"/home/[email protected]/env/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
24 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
25 |
+
]
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"source": [
|
29 |
+
"import torch\n",
|
30 |
+
"import torch\n",
|
31 |
+
"from torch.utils.data import Dataset\n",
|
32 |
+
"from PIL import Image\n",
|
33 |
+
"import pandas as pd\n",
|
34 |
+
"from transformers import AutoProcessor\n",
|
35 |
+
"import numpy as np\n",
|
36 |
+
"from torchvision import transforms\n",
|
37 |
+
"from transformers import BlipForConditionalGeneration\n"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "markdown",
|
42 |
+
"metadata": {
|
43 |
+
"id": "FpRt69nWfFFv"
|
44 |
+
},
|
45 |
+
"source": [
|
46 |
+
"### Create dataset with images and text and process them with BLIP's processor"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"cell_type": "code",
|
51 |
+
"execution_count": 2,
|
52 |
+
"metadata": {
|
53 |
+
"id": "1i4BMba0ln91"
|
54 |
+
},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"class Airbnb(Dataset):\n",
|
58 |
+
" def __init__(self, csv_file, data_augmentation):\n",
|
59 |
+
" self.df = pd.read_csv(csv_file)\n",
|
60 |
+
" self.processor = AutoProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
|
61 |
+
" def __len__(self):\n",
|
62 |
+
" return self.df.shape[0]\n",
|
63 |
+
"\n",
|
64 |
+
" def __getitem__(self, index):\n",
|
65 |
+
" path_to_im = \"/home/[email protected]/image_to_text/blip/living_room/\" + str(self.df.listing_id_x[index])+ '_' + str(self.df.photo_number_x[index])\n",
|
66 |
+
" image = Image.open(path_to_im).convert(\"RGB\")\n",
|
67 |
+
" label = str(self.df.answers[index])\n",
|
68 |
+
" encoding = self.processor(images=image, text=label, padding=\"max_length\", return_tensors=\"pt\")\n",
|
69 |
+
" encoding = {k:v.squeeze() for k,v in encoding.items()}\n",
|
70 |
+
" return encoding"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"cell_type": "markdown",
|
75 |
+
"metadata": {
|
76 |
+
"id": "e2sr84dsfXt7"
|
77 |
+
},
|
78 |
+
"source": [
|
79 |
+
"### Import CSV file"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"cell_type": "code",
|
84 |
+
"execution_count": 3,
|
85 |
+
"metadata": {
|
86 |
+
"id": "Zl0asqIYpp4-"
|
87 |
+
},
|
88 |
+
"outputs": [],
|
89 |
+
"source": [
|
90 |
+
"csv_file = \"/home/[email protected]/image_to_text/blip/Picture_Descriptions_All-Copy.csv\""
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"cell_type": "code",
|
95 |
+
"execution_count": 4,
|
96 |
+
"metadata": {
|
97 |
+
"id": "8uUjuOj-qGsv"
|
98 |
+
},
|
99 |
+
"outputs": [],
|
100 |
+
"source": [
|
101 |
+
"dataset = Airbnb(csv_file, data_augmentation = None)"
|
102 |
+
]
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"cell_type": "markdown",
|
106 |
+
"metadata": {
|
107 |
+
"id": "0IK-kRFxfd3H"
|
108 |
+
},
|
109 |
+
"source": [
|
110 |
+
"### Split train/test dataset"
|
111 |
+
]
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"cell_type": "code",
|
115 |
+
"execution_count": 5,
|
116 |
+
"metadata": {
|
117 |
+
"id": "93wmNMwgqwgg"
|
118 |
+
},
|
119 |
+
"outputs": [],
|
120 |
+
"source": [
|
121 |
+
"train_size = int(0.8 * len(dataset))\n",
|
122 |
+
"test_size = len(dataset) - train_size\n",
|
123 |
+
"train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"cell_type": "markdown",
|
128 |
+
"metadata": {
|
129 |
+
"id": "3VWdqSeWfhAN"
|
130 |
+
},
|
131 |
+
"source": [
|
132 |
+
"### Create dataloader"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"cell_type": "code",
|
137 |
+
"execution_count": 6,
|
138 |
+
"metadata": {
|
139 |
+
"id": "0pJdUuSTqy-5"
|
140 |
+
},
|
141 |
+
"outputs": [],
|
142 |
+
"source": [
|
143 |
+
"train_loader = torch.utils.data.DataLoader(\n",
|
144 |
+
" train_dataset,\n",
|
145 |
+
" batch_size=1,\n",
|
146 |
+
" shuffle=True\n",
|
147 |
+
" )\n",
|
148 |
+
"test_loader = torch.utils.data.DataLoader(\n",
|
149 |
+
" test_dataset,\n",
|
150 |
+
" batch_size=1,\n",
|
151 |
+
" shuffle=True\n",
|
152 |
+
" )"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"cell_type": "markdown",
|
157 |
+
"metadata": {
|
158 |
+
"id": "mnwwxvB_fjlx"
|
159 |
+
},
|
160 |
+
"source": [
|
161 |
+
"### Import model and create device"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 7,
|
167 |
+
"metadata": {
|
168 |
+
"id": "jY6h9kpgq0KX"
|
169 |
+
},
|
170 |
+
"outputs": [],
|
171 |
+
"source": [
|
172 |
+
"model = BlipForConditionalGeneration.from_pretrained(\"Salesforce/blip-image-captioning-base\")"
|
173 |
+
]
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"cell_type": "code",
|
177 |
+
"execution_count": 8,
|
178 |
+
"metadata": {
|
179 |
+
"id": "9rk60pCKfUkV"
|
180 |
+
},
|
181 |
+
"outputs": [],
|
182 |
+
"source": [
|
183 |
+
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
|
184 |
+
]
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"cell_type": "markdown",
|
188 |
+
"metadata": {
|
189 |
+
"id": "HbiDQqzngCbn"
|
190 |
+
},
|
191 |
+
"source": [
|
192 |
+
"### Train loop"
|
193 |
+
]
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"cell_type": "code",
|
197 |
+
"execution_count": 9,
|
198 |
+
"metadata": {
|
199 |
+
"colab": {
|
200 |
+
"base_uri": "https://localhost:8080/"
|
201 |
+
},
|
202 |
+
"id": "i39jlG5Aq1Yo",
|
203 |
+
"outputId": "a5292b17-f2b9-4a38-db0a-3f97d4923aa4"
|
204 |
+
},
|
205 |
+
"outputs": [
|
206 |
+
{
|
207 |
+
"name": "stdout",
|
208 |
+
"output_type": "stream",
|
209 |
+
"text": [
|
210 |
+
"Epoch: 0\n"
|
211 |
+
]
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"name": "stderr",
|
215 |
+
"output_type": "stream",
|
216 |
+
"text": [
|
217 |
+
"We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
|
218 |
+
]
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"ename": "KeyboardInterrupt",
|
222 |
+
"evalue": "",
|
223 |
+
"output_type": "error",
|
224 |
+
"traceback": [
|
225 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
226 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
227 |
+
"Cell \u001b[0;32mIn[9], line 25\u001b[0m\n\u001b[1;32m 22\u001b[0m total_examples \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m labels\u001b[38;5;241m.\u001b[39mnumel()\n\u001b[1;32m 24\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m---> 25\u001b[0m \u001b[43moptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 26\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m 28\u001b[0m average_loss \u001b[38;5;241m=\u001b[39m total_loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mlen\u001b[39m(train_loader)\n",
|
228 |
+
"File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/optimizer.py:385\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 382\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must return None or a tuple of (new_args, new_kwargs), but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 383\u001b[0m )\n\u001b[0;32m--> 385\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 388\u001b[0m \u001b[38;5;66;03m# call optimizer step post hooks\u001b[39;00m\n",
|
229 |
+
"File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/optimizer.py:76\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.<locals>._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m torch\u001b[38;5;241m.\u001b[39mset_grad_enabled(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefaults[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdifferentiable\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 75\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n\u001b[0;32m---> 76\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 78\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n",
|
230 |
+
"File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:187\u001b[0m, in \u001b[0;36mAdamW.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 174\u001b[0m beta1, beta2 \u001b[38;5;241m=\u001b[39m group[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbetas\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 176\u001b[0m has_complex \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_group(\n\u001b[1;32m 177\u001b[0m group,\n\u001b[1;32m 178\u001b[0m params_with_grad,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 184\u001b[0m state_steps,\n\u001b[1;32m 185\u001b[0m )\n\u001b[0;32m--> 187\u001b[0m \u001b[43madamw\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams_with_grad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mweight_decay\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meps\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmaximize\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mforeach\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mforeach\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcapturable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdifferentiable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mfused\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfused\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgrad_scale\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfound_inf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n",
|
231 |
+
"File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:339\u001b[0m, in \u001b[0;36madamw\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, has_complex, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 337\u001b[0m func \u001b[38;5;241m=\u001b[39m _single_tensor_adamw\n\u001b[0;32m--> 339\u001b[0m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 344\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaximize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 353\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcapturable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 354\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdifferentiable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 355\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgrad_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 356\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfound_inf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
232 |
+
"File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:552\u001b[0m, in \u001b[0;36m_multi_tensor_adamw\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable, has_complex)\u001b[0m\n\u001b[1;32m 549\u001b[0m torch\u001b[38;5;241m.\u001b[39m_foreach_lerp_(device_exp_avgs, device_grads, \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m-\u001b[39m beta1)\n\u001b[1;32m 551\u001b[0m torch\u001b[38;5;241m.\u001b[39m_foreach_mul_(device_exp_avg_sqs, beta2)\n\u001b[0;32m--> 552\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_foreach_addcmul_\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_grads\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_grads\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[38;5;66;03m# Delete the local intermediate since it won't be used anymore to save on peak memory\u001b[39;00m\n\u001b[1;32m 555\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m device_grads\n",
|
233 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
234 |
+
]
|
235 |
+
}
|
236 |
+
],
|
237 |
+
"source": [
|
238 |
+
"optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)\n",
|
239 |
+
"model.to(device)\n",
|
240 |
+
"model.train()\n",
|
241 |
+
"for epoch in range(5):\n",
|
242 |
+
" print(\"Epoch:\", epoch)\n",
|
243 |
+
" total_loss = 0.0\n",
|
244 |
+
" total_correct = 0\n",
|
245 |
+
" total_examples = 0\n",
|
246 |
+
"\n",
|
247 |
+
" for idx, batch in enumerate(train_loader):\n",
|
248 |
+
" input_ids = batch.pop(\"input_ids\").to(device)\n",
|
249 |
+
" pixel_values = batch.pop(\"pixel_values\").to(device)\n",
|
250 |
+
" labels = input_ids\n",
|
251 |
+
"\n",
|
252 |
+
" outputs = model(input_ids=input_ids, pixel_values=pixel_values, labels=labels)\n",
|
253 |
+
" loss = outputs.loss\n",
|
254 |
+
" total_loss += loss.item()\n",
|
255 |
+
"\n",
|
256 |
+
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
257 |
+
" correct = (predictions == labels).sum().item()\n",
|
258 |
+
" total_correct += correct\n",
|
259 |
+
" total_examples += labels.numel()\n",
|
260 |
+
"\n",
|
261 |
+
" loss.backward()\n",
|
262 |
+
" optimizer.step()\n",
|
263 |
+
" optimizer.zero_grad()\n",
|
264 |
+
"\n",
|
265 |
+
" average_loss = total_loss / len(train_loader)\n",
|
266 |
+
" accuracy = total_correct / total_examples\n",
|
267 |
+
" print(f\"Average Loss for epoch {epoch}: {average_loss:.4f}\")\n",
|
268 |
+
" print(f\"Accuracy for epoch {epoch}: {accuracy:.2f}\")"
|
269 |
+
]
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"cell_type": "markdown",
|
273 |
+
"metadata": {
|
274 |
+
"id": "Dc4j-hLrgE6r"
|
275 |
+
},
|
276 |
+
"source": [
|
277 |
+
"### Test loop"
|
278 |
+
]
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"cell_type": "code",
|
282 |
+
"execution_count": null,
|
283 |
+
"metadata": {
|
284 |
+
"id": "sMEMW6MiO0sS"
|
285 |
+
},
|
286 |
+
"outputs": [],
|
287 |
+
"source": [
|
288 |
+
"model.eval()\n",
|
289 |
+
"with torch.no_grad():\n",
|
290 |
+
" total_loss = 0.0\n",
|
291 |
+
" total_correct = 0\n",
|
292 |
+
" total_examples = 0\n",
|
293 |
+
"\n",
|
294 |
+
" for idx, batch in enumerate(test_loader):\n",
|
295 |
+
" input_ids = batch.pop(\"input_ids\").to(device)\n",
|
296 |
+
" pixel_values = batch.pop(\"pixel_values\").to(device)\n",
|
297 |
+
" labels = input_ids\n",
|
298 |
+
"\n",
|
299 |
+
" outputs = model(input_ids=input_ids, pixel_values=pixel_values, labels=labels)\n",
|
300 |
+
" loss = outputs.loss\n",
|
301 |
+
" total_loss += loss.item()\n",
|
302 |
+
"\n",
|
303 |
+
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
304 |
+
" correct = (predictions == labels).sum().item()\n",
|
305 |
+
" total_correct += correct\n",
|
306 |
+
" total_examples += labels.numel()\n",
|
307 |
+
"\n",
|
308 |
+
" average_loss = total_loss / len(test_loader)\n",
|
309 |
+
" accuracy = total_correct / total_examples\n",
|
310 |
+
" print(f\"Test Average Loss: {average_loss:.4f}\")\n",
|
311 |
+
" print(f\"Test Accuracy: {accuracy:.2f}\")"
|
312 |
+
]
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"cell_type": "code",
|
316 |
+
"execution_count": null,
|
317 |
+
"metadata": {
|
318 |
+
"id": "qcKs5-3Jgz-M"
|
319 |
+
},
|
320 |
+
"outputs": [],
|
321 |
+
"source": []
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"cell_type": "code",
|
325 |
+
"execution_count": null,
|
326 |
+
"metadata": {
|
327 |
+
"id": "ObYnoCzag0Aq"
|
328 |
+
},
|
329 |
+
"outputs": [],
|
330 |
+
"source": []
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"cell_type": "code",
|
334 |
+
"execution_count": null,
|
335 |
+
"metadata": {
|
336 |
+
"id": "rY6u33avg0CM"
|
337 |
+
},
|
338 |
+
"outputs": [],
|
339 |
+
"source": []
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"cell_type": "code",
|
343 |
+
"execution_count": null,
|
344 |
+
"metadata": {
|
345 |
+
"id": "8EZkrYFqg0E2"
|
346 |
+
},
|
347 |
+
"outputs": [],
|
348 |
+
"source": []
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"cell_type": "code",
|
352 |
+
"execution_count": 10,
|
353 |
+
"metadata": {
|
354 |
+
"id": "qBmjfndHgzFj"
|
355 |
+
},
|
356 |
+
"outputs": [
|
357 |
+
{
|
358 |
+
"name": "stderr",
|
359 |
+
"output_type": "stream",
|
360 |
+
"text": [
|
361 |
+
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
362 |
+
"To disable this warning, you can either:\n",
|
363 |
+
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
364 |
+
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
365 |
+
]
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"name": "stdout",
|
369 |
+
"output_type": "stream",
|
370 |
+
"text": [
|
371 |
+
"Requirement already satisfied: huggingface_hub in /home/[email protected]/env/venv/lib/python3.10/site-packages (0.22.2)\n",
|
372 |
+
"Requirement already satisfied: tqdm>=4.42.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (4.66.2)\n",
|
373 |
+
"Requirement already satisfied: requests in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n",
|
374 |
+
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (4.11.0)\n",
|
375 |
+
"Requirement already satisfied: filelock in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (3.13.4)\n",
|
376 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\n",
|
377 |
+
"Requirement already satisfied: pyyaml>=5.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n",
|
378 |
+
"Requirement already satisfied: packaging>=20.9 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (24.0)\n",
|
379 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.2.2)\n",
|
380 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2.2.1)\n",
|
381 |
+
"Requirement already satisfied: idna<4,>=2.5 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.7)\n",
|
382 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n",
|
383 |
+
"Note: you may need to restart the kernel to use updated packages.\n"
|
384 |
+
]
|
385 |
+
}
|
386 |
+
],
|
387 |
+
"source": [
|
388 |
+
"pip install huggingface_hub"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"cell_type": "markdown",
|
393 |
+
"metadata": {
|
394 |
+
"id": "ISBzxw0Igout"
|
395 |
+
},
|
396 |
+
"source": [
|
397 |
+
"### Gradio webapp"
|
398 |
+
]
|
399 |
+
},
|
400 |
+
{
|
401 |
+
"cell_type": "code",
|
402 |
+
"execution_count": null,
|
403 |
+
"metadata": {
|
404 |
+
"colab": {
|
405 |
+
"base_uri": "https://localhost:8080/",
|
406 |
+
"height": 337
|
407 |
+
},
|
408 |
+
"id": "tHSnxN7AZw8a",
|
409 |
+
"outputId": "8fc49c5d-de24-4a57-e86d-2e63010b382d"
|
410 |
+
},
|
411 |
+
"outputs": [
|
412 |
+
{
|
413 |
+
"ename": "ModuleNotFoundError",
|
414 |
+
"errorDetails": {
|
415 |
+
"actions": [
|
416 |
+
{
|
417 |
+
"action": "open_url",
|
418 |
+
"actionText": "Open Examples",
|
419 |
+
"url": "/notebooks/snippets/importing_libraries.ipynb"
|
420 |
+
}
|
421 |
+
]
|
422 |
+
},
|
423 |
+
"evalue": "No module named 'gradio'",
|
424 |
+
"output_type": "error",
|
425 |
+
"traceback": [
|
426 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
427 |
+
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
428 |
+
"\u001b[0;32m<ipython-input-38-c71c84f2e5e0>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mgradio\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mgr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgradio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomponents\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLabel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
429 |
+
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gradio'",
|
430 |
+
"",
|
431 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
|
432 |
+
]
|
433 |
+
}
|
434 |
+
],
|
435 |
+
"source": [
|
436 |
+
"import gradio as gr\n",
|
437 |
+
"from gradio.components import Label"
|
438 |
+
]
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"cell_type": "code",
|
442 |
+
"execution_count": null,
|
443 |
+
"metadata": {
|
444 |
+
"id": "eNDHwvGEad6n"
|
445 |
+
},
|
446 |
+
"outputs": [],
|
447 |
+
"source": [
|
448 |
+
"model.eval() # Mettez votre modèle en mode évaluation\n",
|
449 |
+
"\n",
|
450 |
+
"# Fonction d'inférence pour Gradio\n",
|
451 |
+
"def predict(image):\n",
|
452 |
+
" processor = AutoProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
|
453 |
+
" inputs = processor(images=image, return_tensors=\"pt\").to(device)\n",
|
454 |
+
" pixel_values = inputs.pixel_values\n",
|
455 |
+
"\n",
|
456 |
+
" generated_ids = model.generate(pixel_values=pixel_values, max_length=50)\n",
|
457 |
+
" generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
|
458 |
+
"\n",
|
459 |
+
"# Création de l'interface Gradio\n",
|
460 |
+
"iface = gr.Interface(fn=predict,\n",
|
461 |
+
" inputs=gr.components.Textbox(placeholder=\"Enter your text here...\"),\n",
|
462 |
+
" outputs=gr.components.Label(num_top_classes=2))\n",
|
463 |
+
"iface.launch(share=True)"
|
464 |
+
]
|
465 |
+
}
|
466 |
+
],
|
467 |
+
"metadata": {
|
468 |
+
"accelerator": "GPU",
|
469 |
+
"colab": {
|
470 |
+
"gpuType": "T4",
|
471 |
+
"provenance": []
|
472 |
+
},
|
473 |
+
"kernelspec": {
|
474 |
+
"display_name": "venv",
|
475 |
+
"language": "python",
|
476 |
+
"name": "venv"
|
477 |
+
},
|
478 |
+
"language_info": {
|
479 |
+
"codemirror_mode": {
|
480 |
+
"name": "ipython",
|
481 |
+
"version": 3
|
482 |
+
},
|
483 |
+
"file_extension": ".py",
|
484 |
+
"mimetype": "text/x-python",
|
485 |
+
"name": "python",
|
486 |
+
"nbconvert_exporter": "python",
|
487 |
+
"pygments_lexer": "ipython3",
|
488 |
+
"version": "3.10.12"
|
489 |
+
}
|
490 |
+
},
|
491 |
+
"nbformat": 4,
|
492 |
+
"nbformat_minor": 4
|
493 |
+
}
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Salesforce/blip-image-captioning-base",
|
3 |
+
"architectures": [
|
4 |
+
"BlipForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"image_text_hidden_size": 256,
|
7 |
+
"initializer_factor": 1.0,
|
8 |
+
"initializer_range": 0.02,
|
9 |
+
"label_smoothing": 0.0,
|
10 |
+
"logit_scale_init_value": 2.6592,
|
11 |
+
"model_type": "blip",
|
12 |
+
"projection_dim": 512,
|
13 |
+
"text_config": {
|
14 |
+
"initializer_factor": 1.0,
|
15 |
+
"model_type": "blip_text_model",
|
16 |
+
"num_attention_heads": 12
|
17 |
+
},
|
18 |
+
"torch_dtype": "float32",
|
19 |
+
"transformers_version": "4.38.2",
|
20 |
+
"vision_config": {
|
21 |
+
"dropout": 0.0,
|
22 |
+
"initializer_factor": 1.0,
|
23 |
+
"initializer_range": 0.02,
|
24 |
+
"model_type": "blip_vision_model",
|
25 |
+
"num_channels": 3
|
26 |
+
}
|
27 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 30522,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.38.2"
|
7 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:393dfdb97bb82ebafd725764c989a0fbf37086428ebdec3b5625c8bd4916e412
|
3 |
+
size 989717056
|
preprocessor_config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_convert_rgb": true,
|
3 |
+
"do_normalize": true,
|
4 |
+
"do_rescale": true,
|
5 |
+
"do_resize": true,
|
6 |
+
"image_mean": [
|
7 |
+
0.48145466,
|
8 |
+
0.4578275,
|
9 |
+
0.40821073
|
10 |
+
],
|
11 |
+
"image_processor_type": "BlipImageProcessor",
|
12 |
+
"image_std": [
|
13 |
+
0.26862954,
|
14 |
+
0.26130258,
|
15 |
+
0.27577711
|
16 |
+
],
|
17 |
+
"processor_class": "BlipProcessor",
|
18 |
+
"resample": 3,
|
19 |
+
"rescale_factor": 0.00392156862745098,
|
20 |
+
"size": {
|
21 |
+
"height": 384,
|
22 |
+
"width": 384
|
23 |
+
}
|
24 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_input_names": [
|
50 |
+
"input_ids",
|
51 |
+
"attention_mask"
|
52 |
+
],
|
53 |
+
"model_max_length": 512,
|
54 |
+
"never_split": null,
|
55 |
+
"pad_token": "[PAD]",
|
56 |
+
"processor_class": "BlipProcessor",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"unk_token": "[UNK]"
|
62 |
+
}
|