Spaces:

Raj086
/

image-captioning

Sleeping

image-captioning / app.py

Upload 2 files

fe52fb1 verified 12 months ago

1.45 kB

	import streamlit as st
	from PIL import Image
	import torch
	from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2TokenizerFast
	from gtts import gTTS

	device = 'cuda' if torch.cuda.is_available() else 'cpu'





	model = VisionEncoderDecoderModel.from_pretrained('nlpconnect/vit-gpt2-image-captioning').to(device)
	tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")


	def get_caption(model,image_processor,tokenizer,image_path):
	image = Image.open(image_path)

	#processing the image
	img = image_processor(image,return_tensors='pt').to(device)

	# gteneratimg caption
	output = model.generate(**img)

	# decode the output
	caption = tokenizer.batch_decode(output,skip_special_tokens=True)[0]

	return caption




	st.title('Vision Transformers (ViT) in Image Captioning Using Pretrained ViT Models')


	uploaded_image = st.file_uploader('Upload an Image',type=['png','jpg','jpeg'])

	if uploaded_image is not None:
	# image = Image.open(uploaded_image)
	st.image(uploaded_image)
	caption = get_caption(model,image_processor,tokenizer,uploaded_image)
	st.header(caption)
	read_caption = gTTS(caption,lang='en',slow=True)
	read_caption.save('caption.mp3')
	st.audio('caption.mp3',autoplay=True)
	else:
	st.error('No Image Uploaded !')