Spaces:

darshan8950
/

image-captioning-using-blip

Sleeping

File size: 1,154 Bytes

8a2b261
 
45abd4e
8a2b261
088da54
8a2b261
 
b21b6cd
2ece00e
8a2b261
 
 
088da54
8a2b261
 
088da54
8a2b261
 
 
8cdcd82
 
 
 
8a2b261
 
7eda7d2
 
 
8a2b261
 
8cdcd82

import gradio as gr
import torch
import cv2
from lavis.models import load_model_and_preprocess
from PIL import Image

# Load the Blip-Caption model

model, vis_processors, _ = load_model_and_preprocess(name="blip_caption", model_type="base_coco", is_eval=True)

# Define the input and output functions for Gradio
def generate_caption(image_file):
        image = Image.fromarray(image_file).convert('RGB')

        # Preprocess the image using the Blip-Caption model's visual processors
        image = vis_processors["eval"](image).unsqueeze(0)

        # Generate captions using the Blip-Caption model
        captions = model.generate({"image": image}, use_nucleus_sampling=True, num_captions=5)
        beautified_captions = [caption.capitalize() for caption in captions]
        beautified_captions_str = "\n".join(beautified_captions)
        
        return beautified_captions_str

# Set up the Gradio interface
#inputs = gr.inputs.Image(type="pil",label="Image")
#outputs = gr.Textbox(label="Captions")
interface = gr.Interface(fn=generate_caption, inputs="image", outputs="text", title="Blip-Caption")

# Launch the interface
interface.launch()