Spaces:
Build error
Build error
import torch | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from PIL import Image | |
import gradio as gr | |
import numpy as np | |
import tempfile | |
import os | |
# Set device to GPU if available, otherwise use CPU | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Load the BLIP model and processor | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device) | |
# Function to generate caption for the image using BLIP | |
def generate_caption(image): | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
output_ids = model.generate(**inputs) | |
return processor.decode(output_ids[0], skip_special_tokens=True) | |
# Function to process images and generate captions | |
def process_images(image_files): | |
captions = [] | |
for image_file in image_files: | |
image = Image.open(image_file).convert('RGB') | |
caption = generate_caption(image) | |
captions.append(caption) | |
return captions | |
# Setup Gradio interface | |
iface = gr.Interface( | |
fn=process_images, | |
inputs=[gr.Files(label="Upload Image Files")], | |
outputs=[gr.Textbox(label="Image Captions")], | |
title="Image Captioning with BLIP", | |
description="Upload images and generate captions using the BLIP model from Hugging Face." | |
) | |
iface.launch(debug=True) | |