Spaces:

Kelex83
/

Caption_images

Running

App Files Files Community

Caption_images / app.py

Kelex83

Update app.py

b3a5b20 verified 6 months ago

raw

history blame contribute delete

2.48 kB

	import gradio as gr
	import numpy as np
	from PIL import Image
	import requests
	from io import BytesIO
	from transformers import AutoProcessor, BlipForConditionalGeneration

	# Load the pretrained processor and model
	processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	def fetch_image(url: str) -> np.ndarray:
	"""Fetch an image from a given URL and return it as a numpy array."""
	try:
	response = requests.get(url, stream=True)
	response.raise_for_status()
	image = Image.open(response.raw).convert('RGB')
	return np.array(image)
	except Exception as e:
	raise ValueError(f"Failed to fetch image: {str(e)}")

	def caption_image(input_image=None, image_url=None):
	"""Generate captions for the input image or image fetched from a URL."""
	try:
	if image_url:
	image_array = fetch_image(image_url)
	elif input_image is not None:
	image_array = input_image
	else:
	raise ValueError("Please provide either an image or an image URL.")

	# Ensure the image is in RGB format
	pil_image = Image.fromarray(image_array).convert('RGB')

	# Process the image and generate caption
	inputs = processor(pil_image, return_tensors="pt")
	out = model.generate(**inputs, max_length=50)
	caption = processor.decode(out[0], skip_special_tokens=True)

	# Save caption as a downloadable .txt file
	caption_path = "caption.txt"
	with open(caption_path, "w") as f:
	f.write(caption)

	return caption, caption_path
	except Exception as e:
	return f"Error: {str(e)}", None

	iface = gr.Interface(
	fn=caption_image,
	inputs=[
	gr.Image(type="numpy", label="Upload Image"),
	gr.Textbox(label="Image URL (Optional)", placeholder="Enter an image URL here")
	],
	outputs=[
	gr.Textbox(label="Generated Caption"),
	gr.File(label="Download Caption")
	],
	examples = [
	["model.jpg"],
	["horse.jpeg"],
	["panda.jpg"]
	],
	title="Advanced Image Captioning with the BLIP model",
	description="Upload an image or provide a URL to an image to generate a caption. You can also drag and drop the example images. Download the generated caption as a .txt file if needed.",
	live=True,
	theme="compact"
	)

	iface.launch()