Spaces:
Running
Running
from PIL import Image | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
import torch | |
import gradio as gr | |
model_name = "Salesforce/blip-image-captioning-base" | |
caption_processor = BlipProcessor.from_pretrained(model_name) | |
model = BlipForConditionalGeneration.from_pretrained(model_name) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
def generate_captions(image, num_captions=5,size=(512, 512)): | |
image = image.resize(size) | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
pixel_values = caption_processor(image, return_tensors='pt').to(device) | |
caption_ids = model.generate( | |
**pixel_values, | |
max_length=30, | |
num_beams=5, | |
num_return_sequences=num_captions, | |
temperature=1.0 | |
) | |
captions = [ | |
caption_processor.decode(ids, skip_special_tokens=True) | |
for ids in caption_ids | |
] | |
return captions | |
from gradio.components import Image, Textbox,Slider | |
interface = gr.Interface( | |
fn=generate_captions, | |
inputs=[ | |
Image(type="pil", label="Input Image"), | |
Slider(minimum=1, maximum=5, step=1, label="Number of Captions") | |
], | |
outputs=Textbox(type="text", label="Captions"), | |
title="Image Caption Generator", | |
description="AI tool that creates captions based on the image provided by the user.", | |
) | |
interface.launch() |