kusumakar commited on
Commit
525bca4
·
1 Parent(s): ec2ceea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -3
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from PIL import Image
2
  import streamlit as st
3
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
@@ -10,6 +12,20 @@ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-cap
10
  extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
11
  tokeniser = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def generate_captions(image):
14
  generated_caption = tokeniser.decode(model.generate(extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0])
15
  sentence = generated_caption
@@ -45,15 +61,18 @@ def main():
45
 
46
  # check if file has been uploaded
47
  if uploaded_file is not None:
 
 
 
48
  # load the image
49
- image = Image.open(uploaded_file).convert("RGB")
50
 
51
  # context as prompt
52
- prompt = generate_captions(image)
53
  st.write("The Context is:", prompt)
54
 
55
  # display the image
56
- st.image(image)
57
 
58
  # Generate button
59
  if st.button("Generate"):
 
1
+ import cv2
2
+ import numpy as np
3
  from PIL import Image
4
  import streamlit as st
5
  from transformers import GPT2Tokenizer, GPT2LMHeadModel
 
12
  extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
13
  tokeniser = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
14
 
15
+
16
+ def preprocess_image(image_path):
17
+ # Load the image using OpenCV
18
+ image = cv2.imread(image_path)
19
+
20
+ # Resize the image while maintaining the aspect ratio
21
+ resized_image = cv2.resize(image, (224, 224))
22
+
23
+ # Add an extra dimension to represent the batch size (assuming a single image)
24
+ preprocessed_image = np.expand_dims(resized_image, axis=0)
25
+
26
+ return preprocessed_image
27
+
28
+
29
  def generate_captions(image):
30
  generated_caption = tokeniser.decode(model.generate(extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0])
31
  sentence = generated_caption
 
61
 
62
  # check if file has been uploaded
63
  if uploaded_file is not None:
64
+
65
+ img = preprocess_image(uploaded_file)
66
+
67
  # load the image
68
+ #image = Image.open(uploaded_file).convert("RGB")
69
 
70
  # context as prompt
71
+ prompt = generate_captions(img)
72
  st.write("The Context is:", prompt)
73
 
74
  # display the image
75
+ st.image(img)
76
 
77
  # Generate button
78
  if st.button("Generate"):