File size: 1,405 Bytes
20cc436
acf7f61
20cc436
 
 
 
acf7f61
20cc436
acf7f61
20cc436
 
fc24e3f
acf7f61
 
20cc436
acf7f61
 
 
 
 
20cc436
fc24e3f
 
acf7f61
fc24e3f
acf7f61
 
 
fc24e3f
20cc436
acf7f61
20cc436
 
fc24e3f
 
 
 
20cc436
 
 
fc24e3f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import gradio as gr
import numpy as np
import tempfile
import os

# Set device to GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the BLIP model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Function to generate caption for the image using BLIP
def generate_caption(image):
    inputs = processor(images=image, return_tensors="pt").to(device)
    output_ids = model.generate(**inputs)
    return processor.decode(output_ids[0], skip_special_tokens=True)

# Function to process images and generate captions
def process_images(image_files):
    captions = []
    for image_file in image_files:
        image = Image.open(image_file).convert('RGB')
        caption = generate_caption(image)
        captions.append(caption)
    return captions

# Setup Gradio interface
iface = gr.Interface(
    fn=process_images,
    inputs=[gr.Files(label="Upload Image Files")],
    outputs=[gr.Textbox(label="Image Captions")],
    title="Image Captioning with BLIP",
    description="Upload images and generate captions using the BLIP model from Hugging Face."
)

iface.launch(debug=True)