Spaces:

Raj086
/

image-captioning

Sleeping

Raj086 commited on Jul 17, 2024

Commit

fe52fb1

verified ·

1 Parent(s): 48c0cd4

Upload 2 files

Files changed (2) hide show

app.py ADDED Viewed

+import streamlit as st
+from PIL import Image
+import torch
+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2TokenizerFast
+from gtts import gTTS
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model = VisionEncoderDecoderModel.from_pretrained('nlpconnect/vit-gpt2-image-captioning').to(device)
+tokenizer = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+def get_caption(model,image_processor,tokenizer,image_path):
+  image = Image.open(image_path)
+  #processing the image
+  img = image_processor(image,return_tensors='pt').to(device)
+  # gteneratimg caption
+  output = model.generate(**img)
+  # decode the output
+  caption = tokenizer.batch_decode(output,skip_special_tokens=True)[0]
+  return caption
+st.title('Vision Transformers (ViT) in Image Captioning Using Pretrained ViT Models')
+uploaded_image = st.file_uploader('Upload an Image',type=['png','jpg','jpeg'])
+if uploaded_image is not None:
+#   image = Image.open(uploaded_image)
+  st.image(uploaded_image)
+  caption = get_caption(model,image_processor,tokenizer,uploaded_image)
+  st.header(caption)
+  read_caption = gTTS(caption,lang='en',slow=True)
+  read_caption.save('caption.mp3')
+  st.audio('caption.mp3',autoplay=True)
+else:
+    st.error('No Image Uploaded !')

requirements.txt ADDED Viewed

+altair==5.3.0
+attrs==23.2.0
+blinker==1.8.2
+cachetools==5.4.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comtypes==1.4.5
+filelock==3.15.4
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+gTTS==2.5.1
+huggingface-hub==0.23.5
+idna==3.7
+intel-openmp==2021.4.0
+Jinja2==3.1.4
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mkl==2021.4.0
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+protobuf==5.27.2
+pyarrow==17.0.0
+pydeck==0.9.1
+Pygments==2.18.0
+pypiwin32==223
+python-dateutil==2.9.0.post0
+pytz==2024.1
+pywin32==306
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.19.0
+safetensors==0.4.3
+six==1.16.0
+smmap==5.0.1
+streamlit==1.36.0
+sympy==1.13.0
+tbb==2021.13.0
+tenacity==8.5.0
+tokenizers==0.19.1
+toml==0.10.2
+toolz==0.12.1
+torch==2.2.2+cpu
+torchaudio==2.2.2+cpu
+torchvision==0.17.2+cpu
+tornado==6.4.1
+tqdm==4.66.4
+transformers==4.42.4
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+watchdog==4.0.1