cassandramussard commited on
Commit
73ab90f
1 Parent(s): d17f8cc

add notebook and weights

Browse files
blip.ipynb ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "p5S2GYrJe6lb"
7
+ },
8
+ "source": [
9
+ "# Image to text for Airbnb images"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 1,
15
+ "metadata": {
16
+ "id": "lG3i-iiWe7l_"
17
+ },
18
+ "outputs": [
19
+ {
20
+ "name": "stderr",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "/home/[email protected]/env/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
24
+ " from .autonotebook import tqdm as notebook_tqdm\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "import torch\n",
30
+ "import torch\n",
31
+ "from torch.utils.data import Dataset\n",
32
+ "from PIL import Image\n",
33
+ "import pandas as pd\n",
34
+ "from transformers import AutoProcessor\n",
35
+ "import numpy as np\n",
36
+ "from torchvision import transforms\n",
37
+ "from transformers import BlipForConditionalGeneration\n"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "markdown",
42
+ "metadata": {
43
+ "id": "FpRt69nWfFFv"
44
+ },
45
+ "source": [
46
+ "### Create dataset with images and text and process them with BLIP's processor"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 2,
52
+ "metadata": {
53
+ "id": "1i4BMba0ln91"
54
+ },
55
+ "outputs": [],
56
+ "source": [
57
+ "class Airbnb(Dataset):\n",
58
+ " def __init__(self, csv_file, data_augmentation):\n",
59
+ " self.df = pd.read_csv(csv_file)\n",
60
+ " self.processor = AutoProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
61
+ " def __len__(self):\n",
62
+ " return self.df.shape[0]\n",
63
+ "\n",
64
+ " def __getitem__(self, index):\n",
65
+ " path_to_im = \"/home/[email protected]/image_to_text/blip/living_room/\" + str(self.df.listing_id_x[index])+ '_' + str(self.df.photo_number_x[index])\n",
66
+ " image = Image.open(path_to_im).convert(\"RGB\")\n",
67
+ " label = str(self.df.answers[index])\n",
68
+ " encoding = self.processor(images=image, text=label, padding=\"max_length\", return_tensors=\"pt\")\n",
69
+ " encoding = {k:v.squeeze() for k,v in encoding.items()}\n",
70
+ " return encoding"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "markdown",
75
+ "metadata": {
76
+ "id": "e2sr84dsfXt7"
77
+ },
78
+ "source": [
79
+ "### Import CSV file"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": 3,
85
+ "metadata": {
86
+ "id": "Zl0asqIYpp4-"
87
+ },
88
+ "outputs": [],
89
+ "source": [
90
+ "csv_file = \"/home/[email protected]/image_to_text/blip/Picture_Descriptions_All-Copy.csv\""
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 4,
96
+ "metadata": {
97
+ "id": "8uUjuOj-qGsv"
98
+ },
99
+ "outputs": [],
100
+ "source": [
101
+ "dataset = Airbnb(csv_file, data_augmentation = None)"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "markdown",
106
+ "metadata": {
107
+ "id": "0IK-kRFxfd3H"
108
+ },
109
+ "source": [
110
+ "### Split train/test dataset"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 5,
116
+ "metadata": {
117
+ "id": "93wmNMwgqwgg"
118
+ },
119
+ "outputs": [],
120
+ "source": [
121
+ "train_size = int(0.8 * len(dataset))\n",
122
+ "test_size = len(dataset) - train_size\n",
123
+ "train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "markdown",
128
+ "metadata": {
129
+ "id": "3VWdqSeWfhAN"
130
+ },
131
+ "source": [
132
+ "### Create dataloader"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 6,
138
+ "metadata": {
139
+ "id": "0pJdUuSTqy-5"
140
+ },
141
+ "outputs": [],
142
+ "source": [
143
+ "train_loader = torch.utils.data.DataLoader(\n",
144
+ " train_dataset,\n",
145
+ " batch_size=1,\n",
146
+ " shuffle=True\n",
147
+ " )\n",
148
+ "test_loader = torch.utils.data.DataLoader(\n",
149
+ " test_dataset,\n",
150
+ " batch_size=1,\n",
151
+ " shuffle=True\n",
152
+ " )"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "markdown",
157
+ "metadata": {
158
+ "id": "mnwwxvB_fjlx"
159
+ },
160
+ "source": [
161
+ "### Import model and create device"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 7,
167
+ "metadata": {
168
+ "id": "jY6h9kpgq0KX"
169
+ },
170
+ "outputs": [],
171
+ "source": [
172
+ "model = BlipForConditionalGeneration.from_pretrained(\"Salesforce/blip-image-captioning-base\")"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 8,
178
+ "metadata": {
179
+ "id": "9rk60pCKfUkV"
180
+ },
181
+ "outputs": [],
182
+ "source": [
183
+ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "markdown",
188
+ "metadata": {
189
+ "id": "HbiDQqzngCbn"
190
+ },
191
+ "source": [
192
+ "### Train loop"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": 9,
198
+ "metadata": {
199
+ "colab": {
200
+ "base_uri": "https://localhost:8080/"
201
+ },
202
+ "id": "i39jlG5Aq1Yo",
203
+ "outputId": "a5292b17-f2b9-4a38-db0a-3f97d4923aa4"
204
+ },
205
+ "outputs": [
206
+ {
207
+ "name": "stdout",
208
+ "output_type": "stream",
209
+ "text": [
210
+ "Epoch: 0\n"
211
+ ]
212
+ },
213
+ {
214
+ "name": "stderr",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
218
+ ]
219
+ },
220
+ {
221
+ "ename": "KeyboardInterrupt",
222
+ "evalue": "",
223
+ "output_type": "error",
224
+ "traceback": [
225
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
226
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
227
+ "Cell \u001b[0;32mIn[9], line 25\u001b[0m\n\u001b[1;32m 22\u001b[0m total_examples \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m labels\u001b[38;5;241m.\u001b[39mnumel()\n\u001b[1;32m 24\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m---> 25\u001b[0m \u001b[43moptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 26\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m 28\u001b[0m average_loss \u001b[38;5;241m=\u001b[39m total_loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mlen\u001b[39m(train_loader)\n",
228
+ "File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/optimizer.py:385\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 382\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must return None or a tuple of (new_args, new_kwargs), but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 383\u001b[0m )\n\u001b[0;32m--> 385\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 388\u001b[0m \u001b[38;5;66;03m# call optimizer step post hooks\u001b[39;00m\n",
229
+ "File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/optimizer.py:76\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.<locals>._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m torch\u001b[38;5;241m.\u001b[39mset_grad_enabled(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefaults[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdifferentiable\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 75\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n\u001b[0;32m---> 76\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 78\u001b[0m torch\u001b[38;5;241m.\u001b[39m_dynamo\u001b[38;5;241m.\u001b[39mgraph_break()\n",
230
+ "File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:187\u001b[0m, in \u001b[0;36mAdamW.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 174\u001b[0m beta1, beta2 \u001b[38;5;241m=\u001b[39m group[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbetas\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 176\u001b[0m has_complex \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_group(\n\u001b[1;32m 177\u001b[0m group,\n\u001b[1;32m 178\u001b[0m params_with_grad,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 184\u001b[0m state_steps,\n\u001b[1;32m 185\u001b[0m )\n\u001b[0;32m--> 187\u001b[0m \u001b[43madamw\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams_with_grad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mweight_decay\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meps\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmaximize\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mforeach\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mforeach\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcapturable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdifferentiable\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43mfused\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfused\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgrad_scale\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfound_inf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n",
231
+ "File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:339\u001b[0m, in \u001b[0;36madamw\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, has_complex, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 337\u001b[0m func \u001b[38;5;241m=\u001b[39m _single_tensor_adamw\n\u001b[0;32m--> 339\u001b[0m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 340\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 344\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 350\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 351\u001b[0m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 352\u001b[0m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaximize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 353\u001b[0m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcapturable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 354\u001b[0m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdifferentiable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 355\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgrad_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 356\u001b[0m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfound_inf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
232
+ "File \u001b[0;32m~/env/venv/lib/python3.10/site-packages/torch/optim/adamw.py:552\u001b[0m, in \u001b[0;36m_multi_tensor_adamw\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable, has_complex)\u001b[0m\n\u001b[1;32m 549\u001b[0m torch\u001b[38;5;241m.\u001b[39m_foreach_lerp_(device_exp_avgs, device_grads, \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m-\u001b[39m beta1)\n\u001b[1;32m 551\u001b[0m torch\u001b[38;5;241m.\u001b[39m_foreach_mul_(device_exp_avg_sqs, beta2)\n\u001b[0;32m--> 552\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_foreach_addcmul_\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_grads\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice_grads\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[38;5;66;03m# Delete the local intermediate since it won't be used anymore to save on peak memory\u001b[39;00m\n\u001b[1;32m 555\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m device_grads\n",
233
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
234
+ ]
235
+ }
236
+ ],
237
+ "source": [
238
+ "optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)\n",
239
+ "model.to(device)\n",
240
+ "model.train()\n",
241
+ "for epoch in range(5):\n",
242
+ " print(\"Epoch:\", epoch)\n",
243
+ " total_loss = 0.0\n",
244
+ " total_correct = 0\n",
245
+ " total_examples = 0\n",
246
+ "\n",
247
+ " for idx, batch in enumerate(train_loader):\n",
248
+ " input_ids = batch.pop(\"input_ids\").to(device)\n",
249
+ " pixel_values = batch.pop(\"pixel_values\").to(device)\n",
250
+ " labels = input_ids\n",
251
+ "\n",
252
+ " outputs = model(input_ids=input_ids, pixel_values=pixel_values, labels=labels)\n",
253
+ " loss = outputs.loss\n",
254
+ " total_loss += loss.item()\n",
255
+ "\n",
256
+ " predictions = torch.argmax(outputs.logits, dim=-1)\n",
257
+ " correct = (predictions == labels).sum().item()\n",
258
+ " total_correct += correct\n",
259
+ " total_examples += labels.numel()\n",
260
+ "\n",
261
+ " loss.backward()\n",
262
+ " optimizer.step()\n",
263
+ " optimizer.zero_grad()\n",
264
+ "\n",
265
+ " average_loss = total_loss / len(train_loader)\n",
266
+ " accuracy = total_correct / total_examples\n",
267
+ " print(f\"Average Loss for epoch {epoch}: {average_loss:.4f}\")\n",
268
+ " print(f\"Accuracy for epoch {epoch}: {accuracy:.2f}\")"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "markdown",
273
+ "metadata": {
274
+ "id": "Dc4j-hLrgE6r"
275
+ },
276
+ "source": [
277
+ "### Test loop"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "metadata": {
284
+ "id": "sMEMW6MiO0sS"
285
+ },
286
+ "outputs": [],
287
+ "source": [
288
+ "model.eval()\n",
289
+ "with torch.no_grad():\n",
290
+ " total_loss = 0.0\n",
291
+ " total_correct = 0\n",
292
+ " total_examples = 0\n",
293
+ "\n",
294
+ " for idx, batch in enumerate(test_loader):\n",
295
+ " input_ids = batch.pop(\"input_ids\").to(device)\n",
296
+ " pixel_values = batch.pop(\"pixel_values\").to(device)\n",
297
+ " labels = input_ids\n",
298
+ "\n",
299
+ " outputs = model(input_ids=input_ids, pixel_values=pixel_values, labels=labels)\n",
300
+ " loss = outputs.loss\n",
301
+ " total_loss += loss.item()\n",
302
+ "\n",
303
+ " predictions = torch.argmax(outputs.logits, dim=-1)\n",
304
+ " correct = (predictions == labels).sum().item()\n",
305
+ " total_correct += correct\n",
306
+ " total_examples += labels.numel()\n",
307
+ "\n",
308
+ " average_loss = total_loss / len(test_loader)\n",
309
+ " accuracy = total_correct / total_examples\n",
310
+ " print(f\"Test Average Loss: {average_loss:.4f}\")\n",
311
+ " print(f\"Test Accuracy: {accuracy:.2f}\")"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": null,
317
+ "metadata": {
318
+ "id": "qcKs5-3Jgz-M"
319
+ },
320
+ "outputs": [],
321
+ "source": []
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": null,
326
+ "metadata": {
327
+ "id": "ObYnoCzag0Aq"
328
+ },
329
+ "outputs": [],
330
+ "source": []
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": null,
335
+ "metadata": {
336
+ "id": "rY6u33avg0CM"
337
+ },
338
+ "outputs": [],
339
+ "source": []
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": null,
344
+ "metadata": {
345
+ "id": "8EZkrYFqg0E2"
346
+ },
347
+ "outputs": [],
348
+ "source": []
349
+ },
350
+ {
351
+ "cell_type": "code",
352
+ "execution_count": 10,
353
+ "metadata": {
354
+ "id": "qBmjfndHgzFj"
355
+ },
356
+ "outputs": [
357
+ {
358
+ "name": "stderr",
359
+ "output_type": "stream",
360
+ "text": [
361
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
362
+ "To disable this warning, you can either:\n",
363
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
364
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
365
+ ]
366
+ },
367
+ {
368
+ "name": "stdout",
369
+ "output_type": "stream",
370
+ "text": [
371
+ "Requirement already satisfied: huggingface_hub in /home/[email protected]/env/venv/lib/python3.10/site-packages (0.22.2)\n",
372
+ "Requirement already satisfied: tqdm>=4.42.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (4.66.2)\n",
373
+ "Requirement already satisfied: requests in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n",
374
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (4.11.0)\n",
375
+ "Requirement already satisfied: filelock in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (3.13.4)\n",
376
+ "Requirement already satisfied: fsspec>=2023.5.0 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\n",
377
+ "Requirement already satisfied: pyyaml>=5.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n",
378
+ "Requirement already satisfied: packaging>=20.9 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from huggingface_hub) (24.0)\n",
379
+ "Requirement already satisfied: certifi>=2017.4.17 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.2.2)\n",
380
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2.2.1)\n",
381
+ "Requirement already satisfied: idna<4,>=2.5 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.7)\n",
382
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /home/[email protected]/env/venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n",
383
+ "Note: you may need to restart the kernel to use updated packages.\n"
384
+ ]
385
+ }
386
+ ],
387
+ "source": [
388
+ "pip install huggingface_hub"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "markdown",
393
+ "metadata": {
394
+ "id": "ISBzxw0Igout"
395
+ },
396
+ "source": [
397
+ "### Gradio webapp"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": null,
403
+ "metadata": {
404
+ "colab": {
405
+ "base_uri": "https://localhost:8080/",
406
+ "height": 337
407
+ },
408
+ "id": "tHSnxN7AZw8a",
409
+ "outputId": "8fc49c5d-de24-4a57-e86d-2e63010b382d"
410
+ },
411
+ "outputs": [
412
+ {
413
+ "ename": "ModuleNotFoundError",
414
+ "errorDetails": {
415
+ "actions": [
416
+ {
417
+ "action": "open_url",
418
+ "actionText": "Open Examples",
419
+ "url": "/notebooks/snippets/importing_libraries.ipynb"
420
+ }
421
+ ]
422
+ },
423
+ "evalue": "No module named 'gradio'",
424
+ "output_type": "error",
425
+ "traceback": [
426
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
427
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
428
+ "\u001b[0;32m<ipython-input-38-c71c84f2e5e0>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mgradio\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mgr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgradio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomponents\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLabel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
429
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gradio'",
430
+ "",
431
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"
432
+ ]
433
+ }
434
+ ],
435
+ "source": [
436
+ "import gradio as gr\n",
437
+ "from gradio.components import Label"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "code",
442
+ "execution_count": null,
443
+ "metadata": {
444
+ "id": "eNDHwvGEad6n"
445
+ },
446
+ "outputs": [],
447
+ "source": [
448
+ "model.eval() # Mettez votre modèle en mode évaluation\n",
449
+ "\n",
450
+ "# Fonction d'inférence pour Gradio\n",
451
+ "def predict(image):\n",
452
+ " processor = AutoProcessor.from_pretrained(\"Salesforce/blip-image-captioning-base\")\n",
453
+ " inputs = processor(images=image, return_tensors=\"pt\").to(device)\n",
454
+ " pixel_values = inputs.pixel_values\n",
455
+ "\n",
456
+ " generated_ids = model.generate(pixel_values=pixel_values, max_length=50)\n",
457
+ " generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
458
+ "\n",
459
+ "# Création de l'interface Gradio\n",
460
+ "iface = gr.Interface(fn=predict,\n",
461
+ " inputs=gr.components.Textbox(placeholder=\"Enter your text here...\"),\n",
462
+ " outputs=gr.components.Label(num_top_classes=2))\n",
463
+ "iface.launch(share=True)"
464
+ ]
465
+ }
466
+ ],
467
+ "metadata": {
468
+ "accelerator": "GPU",
469
+ "colab": {
470
+ "gpuType": "T4",
471
+ "provenance": []
472
+ },
473
+ "kernelspec": {
474
+ "display_name": "venv",
475
+ "language": "python",
476
+ "name": "venv"
477
+ },
478
+ "language_info": {
479
+ "codemirror_mode": {
480
+ "name": "ipython",
481
+ "version": 3
482
+ },
483
+ "file_extension": ".py",
484
+ "mimetype": "text/x-python",
485
+ "name": "python",
486
+ "nbconvert_exporter": "python",
487
+ "pygments_lexer": "ipython3",
488
+ "version": "3.10.12"
489
+ }
490
+ },
491
+ "nbformat": 4,
492
+ "nbformat_minor": 4
493
+ }
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/blip-image-captioning-base",
3
+ "architectures": [
4
+ "BlipForConditionalGeneration"
5
+ ],
6
+ "image_text_hidden_size": 256,
7
+ "initializer_factor": 1.0,
8
+ "initializer_range": 0.02,
9
+ "label_smoothing": 0.0,
10
+ "logit_scale_init_value": 2.6592,
11
+ "model_type": "blip",
12
+ "projection_dim": 512,
13
+ "text_config": {
14
+ "initializer_factor": 1.0,
15
+ "model_type": "blip_text_model",
16
+ "num_attention_heads": 12
17
+ },
18
+ "torch_dtype": "float32",
19
+ "transformers_version": "4.38.2",
20
+ "vision_config": {
21
+ "dropout": 0.0,
22
+ "initializer_factor": 1.0,
23
+ "initializer_range": 0.02,
24
+ "model_type": "blip_vision_model",
25
+ "num_channels": 3
26
+ }
27
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 30522,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:393dfdb97bb82ebafd725764c989a0fbf37086428ebdec3b5625c8bd4916e412
3
+ size 989717056
preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "BlipImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "processor_class": "BlipProcessor",
18
+ "resample": 3,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 384,
22
+ "width": 384
23
+ }
24
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_input_names": [
50
+ "input_ids",
51
+ "attention_mask"
52
+ ],
53
+ "model_max_length": 512,
54
+ "never_split": null,
55
+ "pad_token": "[PAD]",
56
+ "processor_class": "BlipProcessor",
57
+ "sep_token": "[SEP]",
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "unk_token": "[UNK]"
62
+ }