Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from spacy.cli import download
|
|
11 |
import base64
|
12 |
import numpy as np
|
13 |
import datetime
|
14 |
-
|
15 |
|
16 |
# Download necessary NLP models
|
17 |
nltk.download('wordnet')
|
@@ -20,7 +20,7 @@ download("en_core_web_sm")
|
|
20 |
nlp = spacy.load("en_core_web_sm")
|
21 |
|
22 |
# Load the pre-trained models for image captioning and summarization
|
23 |
-
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-
|
24 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
25 |
feature_extractor = ViTImageProcessor.from_pretrained(model_name)
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
@@ -76,6 +76,20 @@ def add_image_to_state(image, caption, capture_time):
|
|
76 |
if len(st.session_state.captured_images) < 20:
|
77 |
st.session_state.captured_images.append((img_str, caption, capture_time))
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def page_image_captioning():
|
80 |
st.title("Image Captioning")
|
81 |
st.write("Your image captioning code here")
|
@@ -87,15 +101,14 @@ def page_video_captioning():
|
|
87 |
def page_webcam_capture():
|
88 |
st.title("Live Captioning with Webcam")
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
st.image(img, caption=f"Caption: {caption}")
|
99 |
|
100 |
if st.button('Stop'):
|
101 |
st.write("Camera stopped.")
|
@@ -162,7 +175,7 @@ def main():
|
|
162 |
selected = option_menu(
|
163 |
menu_title="Main Menu",
|
164 |
options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
|
165 |
-
icons=["image", "
|
166 |
menu_icon="cast",
|
167 |
default_index=0,
|
168 |
)
|
|
|
11 |
import base64
|
12 |
import numpy as np
|
13 |
import datetime
|
14 |
+
import time
|
15 |
|
16 |
# Download necessary NLP models
|
17 |
nltk.download('wordnet')
|
|
|
20 |
nlp = spacy.load("en_core_web_sm")
|
21 |
|
22 |
# Load the pre-trained models for image captioning and summarization
|
23 |
+
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
|
24 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
25 |
feature_extractor = ViTImageProcessor.from_pretrained(model_name)
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
76 |
if len(st.session_state.captured_images) < 20:
|
77 |
st.session_state.captured_images.append((img_str, caption, capture_time))
|
78 |
|
79 |
+
def capture_images_from_webcam(num_images=5, interval=0.5):
|
80 |
+
captured_images = []
|
81 |
+
cap = cv2.VideoCapture(0)
|
82 |
+
for _ in range(num_images):
|
83 |
+
ret, frame = cap.read()
|
84 |
+
if not ret:
|
85 |
+
break
|
86 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
87 |
+
pil_image = Image.fromarray(frame_rgb)
|
88 |
+
captured_images.append(pil_image)
|
89 |
+
time.sleep(interval)
|
90 |
+
cap.release()
|
91 |
+
return captured_images
|
92 |
+
|
93 |
def page_image_captioning():
|
94 |
st.title("Image Captioning")
|
95 |
st.write("Your image captioning code here")
|
|
|
101 |
def page_webcam_capture():
|
102 |
st.title("Live Captioning with Webcam")
|
103 |
|
104 |
+
if st.button("Capture 5 Images"):
|
105 |
+
captured_images = capture_images_from_webcam()
|
106 |
+
for img in captured_images:
|
107 |
+
img_array = np.array(img)
|
108 |
+
caption = generate_caption(img)
|
109 |
+
capture_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
110 |
+
add_image_to_state(img_array, caption, capture_time)
|
111 |
+
st.image(img, caption=f"Caption: {caption}")
|
|
|
112 |
|
113 |
if st.button('Stop'):
|
114 |
st.write("Camera stopped.")
|
|
|
175 |
selected = option_menu(
|
176 |
menu_title="Main Menu",
|
177 |
options=["Image Captioning", "Video Captioning", "Webcam Captioning"],
|
178 |
+
icons=["image", "Caret-right-square", "camera"],
|
179 |
menu_icon="cast",
|
180 |
default_index=0,
|
181 |
)
|