NourFakih commited on
Commit
5db375c
·
verified ·
1 Parent(s): 7f5d162

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -11,7 +11,7 @@ from spacy.cli import download
11
  import base64
12
  import numpy as np
13
  import datetime
14
- from streamlit_option_menu import option_menu
15
 
16
  # Download necessary NLP models
17
  nltk.download('wordnet')
@@ -20,7 +20,7 @@ download("en_core_web_sm")
20
  nlp = spacy.load("en_core_web_sm")
21
 
22
  # Load the pre-trained models for image captioning and summarization
23
- model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-115k-12"
24
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
25
  feature_extractor = ViTImageProcessor.from_pretrained(model_name)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -76,6 +76,20 @@ def add_image_to_state(image, caption, capture_time):
76
  if len(st.session_state.captured_images) < 20:
77
  st.session_state.captured_images.append((img_str, caption, capture_time))
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def page_image_captioning():
80
  st.title("Image Captioning")
81
  st.write("Your image captioning code here")
@@ -87,15 +101,14 @@ def page_video_captioning():
87
  def page_webcam_capture():
88
  st.title("Live Captioning with Webcam")
89
 
90
- img_file = st.camera_input("Capture an image")
91
-
92
- if img_file:
93
- img = Image.open(img_file)
94
- img_array = np.array(img)
95
- caption = generate_caption(img)
96
- capture_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
97
- add_image_to_state(img_array, caption, capture_time)
98
- st.image(img, caption=f"Caption: {caption}")
99
 
100
  if st.button('Stop'):
101
  st.write("Camera stopped.")
@@ -162,7 +175,7 @@ def main():
162
  selected = option_menu(
163
  menu_title="Main Menu",
164
  options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
165
- icons=["image", "play-fill", "camera"],
166
  menu_icon="cast",
167
  default_index=0,
168
  )
 
11
  import base64
12
  import numpy as np
13
  import datetime
14
+ import time
15
 
16
  # Download necessary NLP models
17
  nltk.download('wordnet')
 
20
  nlp = spacy.load("en_core_web_sm")
21
 
22
  # Load the pre-trained models for image captioning and summarization
23
+ model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
24
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
25
  feature_extractor = ViTImageProcessor.from_pretrained(model_name)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
76
  if len(st.session_state.captured_images) < 20:
77
  st.session_state.captured_images.append((img_str, caption, capture_time))
78
 
79
+ def capture_images_from_webcam(num_images=5, interval=0.5):
80
+ captured_images = []
81
+ cap = cv2.VideoCapture(0)
82
+ for _ in range(num_images):
83
+ ret, frame = cap.read()
84
+ if not ret:
85
+ break
86
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
87
+ pil_image = Image.fromarray(frame_rgb)
88
+ captured_images.append(pil_image)
89
+ time.sleep(interval)
90
+ cap.release()
91
+ return captured_images
92
+
93
  def page_image_captioning():
94
  st.title("Image Captioning")
95
  st.write("Your image captioning code here")
 
101
  def page_webcam_capture():
102
  st.title("Live Captioning with Webcam")
103
 
104
+ if st.button("Capture 5 Images"):
105
+ captured_images = capture_images_from_webcam()
106
+ for img in captured_images:
107
+ img_array = np.array(img)
108
+ caption = generate_caption(img)
109
+ capture_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
110
+ add_image_to_state(img_array, caption, capture_time)
111
+ st.image(img, caption=f"Caption: {caption}")
 
112
 
113
  if st.button('Stop'):
114
  st.write("Camera stopped.")
 
175
  selected = option_menu(
176
  menu_title="Main Menu",
177
  options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
178
+ icons=["image", "Caret-right-square", "camera"],
179
  menu_icon="cast",
180
  default_index=0,
181
  )