abiabidali's picture
Update app.py
fc24e3f verified
raw
history blame
1.41 kB
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr
import numpy as np
import tempfile
import os
# Set device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Load the BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
# Function to generate caption for the image using BLIP
def generate_caption(image):
inputs = processor(images=image, return_tensors="pt").to(device)
output_ids = model.generate(**inputs)
return processor.decode(output_ids[0], skip_special_tokens=True)
# Function to process images and generate captions
def process_images(image_files):
captions = []
for image_file in image_files:
image = Image.open(image_file).convert('RGB')
caption = generate_caption(image)
captions.append(caption)
return captions
# Setup Gradio interface
iface = gr.Interface(
fn=process_images,
inputs=[gr.Files(label="Upload Image Files")],
outputs=[gr.Textbox(label="Image Captions")],
title="Image Captioning with BLIP",
description="Upload images and generate captions using the BLIP model from Hugging Face."
)
iface.launch(debug=True)