Image-to_text / app.py
TekamBrice's picture
Update app.py
cff69c1 verified
import gradio as gr
import transformers
import torch
import re
import gradio as gr
from PIL import Image
from transformers import pipeline
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
device='cpu'
model_id = "nttdataspain/vit-gpt2-stablediffusion2-lora"
model = VisionEncoderDecoderModel.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)
def predict(image):
img = image.convert('RGB')
model.eval()
pixel_values = feature_extractor(images=[img], return_tensors="pt").pixel_values
with torch.no_grad():
output_ids = model.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True).sequences
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
preds = [pred.strip() for pred in preds]
return preds[0]
# def get_image(img):
# image=pipe(img)
# return image[0]['generated_text']
image=gr.Interface(predict,title='Image to text',inputs= gr.Image(label="Upload any Image", type = 'pil'),outputs='text').launch(share=True)