|
import gradio as gr |
|
import torch |
|
from PIL import Image |
|
from lavis.models import load_model_and_preprocess |
|
|
|
|
|
|
|
|
|
model, vis_processors, _ = load_model_and_preprocess(name="blip_caption", model_type="base_coco", is_eval=True) |
|
|
|
|
|
def generate_caption(image_file): |
|
|
|
raw_image = Image.open(image_file).convert("RGB") |
|
|
|
|
|
image = vis_processors["eval"](raw_image).unsqueeze(0) |
|
|
|
|
|
captions = model.generate({"image": image}, use_nucleus_sampling=True, num_captions=5) |
|
res=" " |
|
for i in captions: |
|
res=res+", "+i |
|
return (res) |
|
|
|
|
|
inputs = gr.inputs.Image(type="pil",label="Image") |
|
outputs = gr.Textbox(label="Captions") |
|
interface = gr.Interface(fn=generate_caption, inputs=inputs, outputs=outputs, title="Blip-Caption") |
|
|
|
|
|
interface.launch(share=True) |
|
|