Spaces:
Running
Running
File size: 2,478 Bytes
073fe88 b3a5b20 073fe88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import numpy as np
from PIL import Image
import requests
from io import BytesIO
from transformers import AutoProcessor, BlipForConditionalGeneration
# Load the pretrained processor and model
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
def fetch_image(url: str) -> np.ndarray:
"""Fetch an image from a given URL and return it as a numpy array."""
try:
response = requests.get(url, stream=True)
response.raise_for_status()
image = Image.open(response.raw).convert('RGB')
return np.array(image)
except Exception as e:
raise ValueError(f"Failed to fetch image: {str(e)}")
def caption_image(input_image=None, image_url=None):
"""Generate captions for the input image or image fetched from a URL."""
try:
if image_url:
image_array = fetch_image(image_url)
elif input_image is not None:
image_array = input_image
else:
raise ValueError("Please provide either an image or an image URL.")
# Ensure the image is in RGB format
pil_image = Image.fromarray(image_array).convert('RGB')
# Process the image and generate caption
inputs = processor(pil_image, return_tensors="pt")
out = model.generate(**inputs, max_length=50)
caption = processor.decode(out[0], skip_special_tokens=True)
# Save caption as a downloadable .txt file
caption_path = "caption.txt"
with open(caption_path, "w") as f:
f.write(caption)
return caption, caption_path
except Exception as e:
return f"Error: {str(e)}", None
iface = gr.Interface(
fn=caption_image,
inputs=[
gr.Image(type="numpy", label="Upload Image"),
gr.Textbox(label="Image URL (Optional)", placeholder="Enter an image URL here")
],
outputs=[
gr.Textbox(label="Generated Caption"),
gr.File(label="Download Caption")
],
examples = [
["model.jpg"],
["horse.jpeg"],
["panda.jpg"]
],
title="Advanced Image Captioning with the BLIP model",
description="Upload an image or provide a URL to an image to generate a caption. You can also drag and drop the example images. Download the generated caption as a .txt file if needed.",
live=True,
theme="compact"
)
iface.launch()
|