Shriharshan commited on
Commit
d3debb8
·
1 Parent(s): 5df03ae

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -255
app.py DELETED
@@ -1,255 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# Image captioning with ViT+GPT2"
8
- ]
9
- },
10
- {
11
- "cell_type": "code",
12
- "execution_count": 1,
13
- "metadata": {},
14
- "outputs": [
15
- {
16
- "name": "stderr",
17
- "output_type": "stream",
18
- "text": [
19
- "f:\\Image caption genrerator\\image_caption\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
20
- " from .autonotebook import tqdm as notebook_tqdm\n"
21
- ]
22
- }
23
- ],
24
- "source": [
25
- "from PIL import Image\n",
26
- "from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast\n",
27
- "import requests"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 2,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "model = VisionEncoderDecoderModel.from_pretrained(\"nlpconnect/vit-gpt2-image-captioning\")"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 3,
42
- "metadata": {},
43
- "outputs": [
44
- {
45
- "name": "stderr",
46
- "output_type": "stream",
47
- "text": [
48
- "f:\\Image caption genrerator\\image_caption\\lib\\site-packages\\transformers\\models\\vit\\feature_extraction_vit.py:28: FutureWarning: The class ViTFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use ViTImageProcessor instead.\n",
49
- " warnings.warn(\n"
50
- ]
51
- }
52
- ],
53
- "source": [
54
- "vit_feature_extactor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224-in21k\")"
55
- ]
56
- },
57
- {
58
- "cell_type": "code",
59
- "execution_count": 4,
60
- "metadata": {},
61
- "outputs": [
62
- {
63
- "name": "stderr",
64
- "output_type": "stream",
65
- "text": [
66
- "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n",
67
- "The tokenizer class you load from this checkpoint is 'GPT2Tokenizer'. \n",
68
- "The class this function is called from is 'PreTrainedTokenizerFast'.\n"
69
- ]
70
- }
71
- ],
72
- "source": [
73
- "tokenizer = PreTrainedTokenizerFast.from_pretrained(\"distilgpt2\")"
74
- ]
75
- },
76
- {
77
- "cell_type": "code",
78
- "execution_count": 5,
79
- "metadata": {},
80
- "outputs": [],
81
- "source": [
82
- "#url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'"
83
- ]
84
- },
85
- {
86
- "cell_type": "code",
87
- "execution_count": 6,
88
- "metadata": {},
89
- "outputs": [],
90
- "source": [
91
- "# with Image.open(requests.get(url, stream=True).raw) as img:\n",
92
- "# pixel_values = vit_feature_extactor(images=img, return_tensors=\"pt\").pixel_values"
93
- ]
94
- },
95
- {
96
- "cell_type": "code",
97
- "execution_count": 7,
98
- "metadata": {},
99
- "outputs": [
100
- {
101
- "name": "stderr",
102
- "output_type": "stream",
103
- "text": [
104
- "f:\\Image caption genrerator\\image_caption\\lib\\site-packages\\transformers\\generation\\utils.py:1346: UserWarning: Using `max_length`'s default (20) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
105
- " warnings.warn(\n"
106
- ]
107
- }
108
- ],
109
- "source": [
110
- "# encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams = 5)"
111
- ]
112
- },
113
- {
114
- "cell_type": "code",
115
- "execution_count": 8,
116
- "metadata": {},
117
- "outputs": [],
118
- "source": [
119
- "# generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True,)"
120
- ]
121
- },
122
- {
123
- "cell_type": "code",
124
- "execution_count": 9,
125
- "metadata": {},
126
- "outputs": [
127
- {
128
- "data": {
129
- "text/plain": [
130
- "['a young boy sitting in front of a laptop computer ']"
131
- ]
132
- },
133
- "execution_count": 9,
134
- "metadata": {},
135
- "output_type": "execute_result"
136
- }
137
- ],
138
- "source": [
139
- "# generated_senetences"
140
- ]
141
- },
142
- {
143
- "cell_type": "code",
144
- "execution_count": 11,
145
- "metadata": {},
146
- "outputs": [
147
- {
148
- "data": {
149
- "text/plain": [
150
- "'a young boy sitting in front of a laptop computer '"
151
- ]
152
- },
153
- "execution_count": 11,
154
- "metadata": {},
155
- "output_type": "execute_result"
156
- }
157
- ],
158
- "source": [
159
- "# generated_senetences[0].split(\".\")[0]"
160
- ]
161
- },
162
- {
163
- "cell_type": "code",
164
- "execution_count": 13,
165
- "metadata": {},
166
- "outputs": [],
167
- "source": [
168
- "def vit2distilgpt2(img):\n",
169
- " pixel_values = vit_feature_extactor(images=img, return_tensors=\"pt\").pixel_values\n",
170
- " encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)\n",
171
- " generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)\n",
172
- "\n",
173
- " return(generated_senetences[0].split('.')[0])"
174
- ]
175
- },
176
- {
177
- "cell_type": "code",
178
- "execution_count": 14,
179
- "metadata": {},
180
- "outputs": [],
181
- "source": [
182
- "import gradio as gr"
183
- ]
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": 2,
188
- "metadata": {},
189
- "outputs": [
190
- {
191
- "ename": "NameError",
192
- "evalue": "name 'gr' is not defined",
193
- "output_type": "error",
194
- "traceback": [
195
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
196
- "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
197
- "Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m inputs \u001b[39m=\u001b[39m [\n\u001b[1;32m----> 2\u001b[0m gr\u001b[39m.\u001b[39minputs\u001b[39m.\u001b[39mImage(\u001b[39mtype\u001b[39m\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mpil\u001b[39m\u001b[39m\"\u001b[39m,label\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mOriginal Images\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m ]\n\u001b[0;32m 5\u001b[0m outputs \u001b[39m=\u001b[39m [\n\u001b[0;32m 6\u001b[0m gr\u001b[39m.\u001b[39moutputs\u001b[39m.\u001b[39mTextbox(label \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mCaption\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 7\u001b[0m ]\n\u001b[0;32m 9\u001b[0m title \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mImage Captioning using ViT + GPT2\u001b[39m\u001b[39m\"\u001b[39m\n",
198
- "\u001b[1;31mNameError\u001b[0m: name 'gr' is not defined"
199
- ]
200
- }
201
- ],
202
- "source": [
203
- "inputs = [\n",
204
- " gr.inputs.Image(type=\"pil\",label=\"Original Images\")\n",
205
- "]\n",
206
- "\n",
207
- "outputs = [\n",
208
- " gr.outputs.Textbox(label = \"Caption\")\n",
209
- "]\n",
210
- "\n",
211
- "title = \"Image Captioning using ViT + GPT2\"\n",
212
- "description = \"ViT and GPT2 are used to generate Image Caption for the uploaded image.COCO DataSet is used for Training\"\n",
213
- "examples = [\n",
214
- " [\".Image1.png\"],\n",
215
- " [\".Image2.png\"],\n",
216
- " [\".Image3.png\"]\n",
217
- "]\n",
218
- "\n",
219
- "\n",
220
- "\n",
221
- "\n",
222
- "gr.Interface(\n",
223
- " vit2distilgpt2,\n",
224
- " inputs,\n",
225
- " outputs,\n",
226
- " title=title,\n",
227
- " description=description,\n",
228
- " examples=examples,\n",
229
- " theme=\"huggingface\",\n",
230
- ").launch(debug=True, enable_queue=True, share=True)"
231
- ]
232
- }
233
- ],
234
- "metadata": {
235
- "kernelspec": {
236
- "display_name": "Python 3 (ipykernel)",
237
- "language": "python",
238
- "name": "python3"
239
- },
240
- "language_info": {
241
- "codemirror_mode": {
242
- "name": "ipython",
243
- "version": 3
244
- },
245
- "file_extension": ".py",
246
- "mimetype": "text/x-python",
247
- "name": "python",
248
- "nbconvert_exporter": "python",
249
- "pygments_lexer": "ipython3",
250
- "version": "3.10.9"
251
- }
252
- },
253
- "nbformat": 4,
254
- "nbformat_minor": 2
255
- }