Spaces:

NourFakih
/

3-page-app

Sleeping

App Files Files Community

NourFakih commited on Jul 15, 2024

Commit

5db375c

verified ·

1 Parent(s): 7f5d162

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -12

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from spacy.cli import download
 import base64
 import numpy as np
 import datetime
-from streamlit_option_menu import option_menu
 # Download necessary NLP models
 nltk.download('wordnet')
@@ -20,7 +20,7 @@ download("en_core_web_sm")
 nlp = spacy.load("en_core_web_sm")
 # Load the pre-trained models for image captioning and summarization
-model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-115k-12"
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
 feature_extractor = ViTImageProcessor.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -76,6 +76,20 @@ def add_image_to_state(image, caption, capture_time):
     if len(st.session_state.captured_images) < 20:
         st.session_state.captured_images.append((img_str, caption, capture_time))
 def page_image_captioning():
     st.title("Image Captioning")
     st.write("Your image captioning code here")
@@ -87,15 +101,14 @@ def page_video_captioning():
 def page_webcam_capture():
     st.title("Live Captioning with Webcam")
-    img_file = st.camera_input("Capture an image")
-    if img_file:
-        img = Image.open(img_file)
-        img_array = np.array(img)
-        caption = generate_caption(img)
-        capture_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        add_image_to_state(img_array, caption, capture_time)
-        st.image(img, caption=f"Caption: {caption}")
     if st.button('Stop'):
         st.write("Camera stopped.")
@@ -162,7 +175,7 @@ def main():
         selected = option_menu(
             menu_title="Main Menu",
             options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
-            icons=["image", "play-fill", "camera"],
             menu_icon="cast",
             default_index=0,
         )

 import base64
 import numpy as np
 import datetime
+import time
 # Download necessary NLP models
 nltk.download('wordnet')
 nlp = spacy.load("en_core_web_sm")
 # Load the pre-trained models for image captioning and summarization
+model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
 feature_extractor = ViTImageProcessor.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
     if len(st.session_state.captured_images) < 20:
         st.session_state.captured_images.append((img_str, caption, capture_time))
+def capture_images_from_webcam(num_images=5, interval=0.5):
+    captured_images = []
+    cap = cv2.VideoCapture(0)
+    for _ in range(num_images):
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        pil_image = Image.fromarray(frame_rgb)
+        captured_images.append(pil_image)
+        time.sleep(interval)
+    cap.release()
+    return captured_images
 def page_image_captioning():
     st.title("Image Captioning")
     st.write("Your image captioning code here")
 def page_webcam_capture():
     st.title("Live Captioning with Webcam")
+    if st.button("Capture 5 Images"):
+        captured_images = capture_images_from_webcam()
+        for img in captured_images:
+            img_array = np.array(img)
+            caption = generate_caption(img)
+            capture_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            add_image_to_state(img_array, caption, capture_time)
+            st.image(img, caption=f"Caption: {caption}")
     if st.button('Stop'):
         st.write("Camera stopped.")
         selected = option_menu(
             menu_title="Main Menu",
             options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
+            icons=["image", "Caret-right-square", "camera"],
             menu_icon="cast",
             default_index=0,
         )