Soumen commited on
Commit
d23083e
1 Parent(s): 3dbef8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -36
app.py CHANGED
@@ -2,54 +2,55 @@ import streamlit as st
2
  import torch
3
  from PIL import Image
4
  from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
5
- #pickle.load(open('energy_model.pkl', 'rb'))
6
- #vocab = np.load('w2i.p', allow_pickle=True)
7
- st.set_page_config(
8
- page_title="Image_Captioning_App",
9
- page_icon="🧊",
10
- layout="wide",
11
- initial_sidebar_state="expanded"
12
- )
13
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
14
  feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
15
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 
 
16
  #st.text("Build with Streamlit and OpenCV")
17
  if "photo" not in st.session_state:
18
  st.session_state["photo"]="not done"
 
19
  c2, c3 = st.columns([2,1])
20
  def change_photo_state():
21
  st.session_state["photo"]="done"
 
 
 
22
  @st.cache
23
  def load_image(img):
24
  im = Image.open(img)
25
  return im
 
 
26
  uploaded_photo = c2.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
27
  camera_photo = c2.camera_input("Take a photo", on_change=change_photo_state)
28
- #st.subheader("Detection")
29
- if st.checkbox("Generate_Caption"):
30
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
- model.to(device)
32
- max_length = 16
33
- num_beams = 4
34
- gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
35
- def predict_step(our_image):
36
- if our_image.mode != "RGB":
37
- our_image = our_image.convert(mode="RGB")
38
- pixel_values = feature_extractor(images=our_image, return_tensors="pt").pixel_values
39
- pixel_values = pixel_values.to(device)
40
- output_ids = model.generate(pixel_values, **gen_kwargs)
41
- preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
42
- preds = [pred.strip() for pred in preds]
43
- return preds
44
- if st.session_state["photo"]=="done":
45
- if uploaded_photo:
46
- our_image= load_image(uploaded_photo)
47
- elif camera_photo:
48
- our_image= load_image(camera_photo)
49
- elif uploaded_photo==None and camera_photo==None:
50
- our_image= load_image('image.jpg')
51
- st.success(predict_step(our_image))
52
- elif st.checkbox("About"):
53
- st.subheader("About Image Captioning App")
54
- st.markdown("Built with Streamlit by [Soumen Sarker](https://soumen-sarker-personal-website.streamlit.app/)")
55
- st.markdown("Demo applicaton of the following model [credit](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning/)")
 
2
  import torch
3
  from PIL import Image
4
  from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
5
+ st.title("Image_Captioning_App")
 
 
 
 
 
 
 
6
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
7
  feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
9
+ #pickle.load(open('energy_model.pkl', 'rb'))
10
+ #vocab = np.load('w2i.p', allow_pickle=True)
11
  #st.text("Build with Streamlit and OpenCV")
12
  if "photo" not in st.session_state:
13
  st.session_state["photo"]="not done"
14
+
15
  c2, c3 = st.columns([2,1])
16
  def change_photo_state():
17
  st.session_state["photo"]="done"
18
+ print("="*150)
19
+ print("RESNET MODEL LOADED")
20
+
21
  @st.cache
22
  def load_image(img):
23
  im = Image.open(img)
24
  return im
25
+ activities = ["Generate ","About"]
26
+ choice = st.sidebar.selectbox("Select Activty",activities)
27
  uploaded_photo = c2.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
28
  camera_photo = c2.camera_input("Take a photo", on_change=change_photo_state)
29
+ if choice == 'Generate':
30
+ st.subheader("Generate")
31
+ if st.session_state["photo"]=="done":
32
+ if uploaded_photo:
33
+ our_image= load_image(uploaded_photo)
34
+ elif camera_photo:
35
+ our_image= load_image(camera_photo)
36
+ elif uploaded_photo==None and camera_photo==None:
37
+ our_image= load_image('image.jpg')
38
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
39
+ model.to(device)
40
+ max_length = 16
41
+ num_beams = 4
42
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
43
+ def predict_step(our_image):
44
+ if our_image.mode != "RGB":
45
+ our_image = our_image.convert(mode="RGB")
46
+ pixel_values = feature_extractor(images=our_image, return_tensors="pt").pixel_values
47
+ pixel_values = pixel_values.to(device)
48
+ output_ids = model.generate(pixel_values, **gen_kwargs)
49
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
50
+ preds = [pred.strip() for pred in preds]
51
+ return preds
52
+ st.success(predict_step(our_image))
53
+ elif choice == 'About':
54
+ st.subheader("About Image Captioning App")
55
+ st.markdown("Built with Streamlit by [Soumen Sarker](https://soumen-sarker-personal-site.streamlit.app/)")
56
+ st.markdown("Demo applicaton of the following model [credit](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning/)")