Spaces:
Running
Running
import gradio as gr | |
import numpy as np | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
from transformers import AutoProcessor, BlipForConditionalGeneration | |
# Load the pretrained processor and model | |
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
def fetch_image(url: str) -> np.ndarray: | |
"""Fetch an image from a given URL and return it as a numpy array.""" | |
try: | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
image = Image.open(response.raw).convert('RGB') | |
return np.array(image) | |
except Exception as e: | |
raise ValueError(f"Failed to fetch image: {str(e)}") | |
def caption_image(input_image=None, image_url=None): | |
"""Generate captions for the input image or image fetched from a URL.""" | |
try: | |
if image_url: | |
image_array = fetch_image(image_url) | |
elif input_image is not None: | |
image_array = input_image | |
else: | |
raise ValueError("Please provide either an image or an image URL.") | |
# Ensure the image is in RGB format | |
pil_image = Image.fromarray(image_array).convert('RGB') | |
# Process the image and generate caption | |
inputs = processor(pil_image, return_tensors="pt") | |
out = model.generate(**inputs, max_length=50) | |
caption = processor.decode(out[0], skip_special_tokens=True) | |
# Save caption as a downloadable .txt file | |
caption_path = "caption.txt" | |
with open(caption_path, "w") as f: | |
f.write(caption) | |
return caption, caption_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
iface = gr.Interface( | |
fn=caption_image, | |
inputs=[ | |
gr.Image(type="numpy", label="Upload Image"), | |
gr.Textbox(label="Image URL (Optional)", placeholder="Enter an image URL here") | |
], | |
outputs=[ | |
gr.Textbox(label="Generated Caption"), | |
gr.File(label="Download Caption") | |
], | |
examples = [ | |
["model.jpg"], | |
["horse.jpeg"], | |
["panda.jpg"] | |
], | |
title="Advanced Image Captioning with the BLIP model", | |
description="Upload an image or provide a URL to an image to generate a caption. You can also drag and drop the example images. Download the generated caption as a .txt file if needed.", | |
live=True, | |
theme="compact" | |
) | |
iface.launch() | |