from PIL import Image import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration class ImageCaption: def __init__(self): self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") def generate(self,img): if isinstance(img,str): img = Image.open(img) # text = "Explain this image,that what she is doing and what kind of expression is it" input = self.processor(img,return_tensors='pt') # print(**input) output = self.model.generate(**input) caption = self.processor.decode(output[0],skip_special_tokens = True) return caption ic = ImageCaption() app = gr.Interface( fn = ic.generate, inputs=gr.Image(type='pil'), outputs="text", description="upload image to generate caption" ) app.launch() # print(ic.generate(input("Enter the source of image: ")))