Spaces:
Runtime error
Runtime error
chats-bug
commited on
Commit
·
7295a68
1
Parent(s):
fbee9c4
More testing
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
|
3 |
import torch
|
4 |
import open_clip
|
|
|
|
|
5 |
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
|
@@ -60,13 +62,13 @@ def generate_caption(
|
|
60 |
The generated caption.
|
61 |
"""
|
62 |
inputs = preprocessor(image, return_tensors="pt").to(device)
|
|
|
63 |
|
64 |
if use_float_16:
|
65 |
inputs = inputs.to(torch.float16)
|
66 |
|
67 |
generated_ids = model.generate(
|
68 |
-
pixel_values=
|
69 |
-
# attention_mask=inputs.attention_mask,
|
70 |
max_length=64,
|
71 |
)
|
72 |
|
@@ -113,7 +115,8 @@ def generate_captions_clip(
|
|
113 |
def generate_captions(
|
114 |
image,
|
115 |
max_length,
|
116 |
-
temperature
|
|
|
117 |
):
|
118 |
"""
|
119 |
Generate captions for the given image.
|
@@ -133,6 +136,10 @@ def generate_captions(
|
|
133 |
caption_git_large_coco = ""
|
134 |
caption_oc_coca = ""
|
135 |
|
|
|
|
|
|
|
|
|
136 |
# Generate captions for the image using the Blip base model
|
137 |
try:
|
138 |
caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
|
@@ -168,6 +175,7 @@ iface = gr.Interface(
|
|
168 |
gr.inputs.Image(label="Image"),
|
169 |
gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
|
170 |
gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
|
|
|
171 |
],
|
172 |
# Define the outputs
|
173 |
outputs=[
|
@@ -182,4 +190,4 @@ iface = gr.Interface(
|
|
182 |
)
|
183 |
|
184 |
# Launch the interface
|
185 |
-
iface.launch()
|
|
|
2 |
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel, BitsAndBytesConfig
|
3 |
import torch
|
4 |
import open_clip
|
5 |
+
from PIL import Image
|
6 |
+
import requests
|
7 |
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
|
|
|
62 |
The generated caption.
|
63 |
"""
|
64 |
inputs = preprocessor(image, return_tensors="pt").to(device)
|
65 |
+
pixel_values = preprocessor(images=image, return_tensors="pt").pixel_values
|
66 |
|
67 |
if use_float_16:
|
68 |
inputs = inputs.to(torch.float16)
|
69 |
|
70 |
generated_ids = model.generate(
|
71 |
+
pixel_values=pixel_values,
|
|
|
72 |
max_length=64,
|
73 |
)
|
74 |
|
|
|
115 |
def generate_captions(
|
116 |
image,
|
117 |
max_length,
|
118 |
+
temperature,
|
119 |
+
use_sample_image,
|
120 |
):
|
121 |
"""
|
122 |
Generate captions for the given image.
|
|
|
136 |
caption_git_large_coco = ""
|
137 |
caption_oc_coca = ""
|
138 |
|
139 |
+
if use_sample_image:
|
140 |
+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
141 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
142 |
+
|
143 |
# Generate captions for the image using the Blip base model
|
144 |
try:
|
145 |
caption_blip_base = generate_caption(preprocessor_blip_base, model_blip_base, image).strip()
|
|
|
175 |
gr.inputs.Image(label="Image"),
|
176 |
gr.inputs.Slider(minimum=16, maximum=64, step=2, default=32, label="Max Length"),
|
177 |
gr.inputs.Slider(minimum=0.5, maximum=1.5, step=0.1, default=1.0, label="Temperature"),
|
178 |
+
gr.inputs.Checkbox(default=False, type="bool", label="Use example image")
|
179 |
],
|
180 |
# Define the outputs
|
181 |
outputs=[
|
|
|
190 |
)
|
191 |
|
192 |
# Launch the interface
|
193 |
+
iface.launch(debug=True)
|