NourFakih commited on
Commit
1047dcc
·
verified ·
1 Parent(s): 72535f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +211 -0
app.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import cv2
4
+ import tempfile
5
+ from PIL import Image
6
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer, pipeline
7
+ import torch
8
+ import pandas as pd
9
+ from nltk.corpus import wordnet
10
+ import nltk
11
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
+
13
+ nltk.download('wordnet')
14
+ nltk.download('omw-1.4')
15
+
16
+ # Load the pre-trained model for image captioning
17
+ model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
18
+ model = VisionEncoderDecoderModel.from_pretrained(model_name)
19
+ feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+
22
+
23
+ model_sum_name = "google-t5/t5-base"
24
+ tokenizer_sum = AutoTokenizer.from_pretrained("google-t5/t5-base")
25
+ model_sum = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
26
+ # Initialize the summarization model
27
+ summarize_pipe = pipeline("summarization", model=model_sum_name)
28
+
29
+ def generate_caption(image):
30
+ pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
31
+ output_ids = model.generate(pixel_values)
32
+ caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
33
+ return caption
34
+
35
+ def get_synonyms(word):
36
+ synonyms = set()
37
+ for syn in wordnet.synsets(word):
38
+ for lemma in syn.lemmas():
39
+ synonyms.add(lemma.name())
40
+ return synonyms
41
+
42
+ def search_captions(query, captions):
43
+ query_words = query.split()
44
+ query_synonyms = set(query_words)
45
+ for word in query_words:
46
+ query_synonyms.update(get_synonyms(word))
47
+
48
+ results = []
49
+ for path, caption in captions.items():
50
+ if any(word in caption.split() for word in query_synonyms):
51
+ results.append((path, caption))
52
+
53
+ return results
54
+
55
+ def convert_frame_to_pil(frame):
56
+ return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
57
+
58
+ def process_video(video_path, frame_interval):
59
+ cap = cv2.VideoCapture(video_path)
60
+ if not cap.isOpened():
61
+ st.error("Error: Could not open video file.")
62
+ return [], pd.DataFrame()
63
+
64
+ video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
65
+ frames = []
66
+ count = 0
67
+ frame_id = 0
68
+ while cap.isOpened():
69
+ ret, frame = cap.read()
70
+ if not ret:
71
+ break
72
+ if count % frame_interval == 0:
73
+ frames.append((frame_id, frame))
74
+ frame_id += 1
75
+ count += 1
76
+ if count > video_length - 1:
77
+ break
78
+ cap.release()
79
+
80
+ captions_data = []
81
+ for i, (frame_id, frame) in enumerate(frames):
82
+ pil_image = convert_frame_to_pil(frame)
83
+ caption = generate_caption(pil_image)
84
+ captions_data.append({'Frame_ID': frame_id + 1, 'Caption': caption})
85
+
86
+ captions_df = pd.DataFrame(captions_data)
87
+ return frames, captions_df
88
+
89
+ def image_captioning_page():
90
+ st.title("Image Gallery with Captioning and Search")
91
+
92
+ # Sidebar for search functionality
93
+ with st.sidebar:
94
+ query = st.text_input("Search images by caption:")
95
+
96
+ # Right side for folder path input and displaying images
97
+ folder_path = st.text_input("Enter the folder path containing images:")
98
+
99
+ if folder_path and os.path.isdir(folder_path):
100
+ image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('png', 'jpg', 'jpeg'))]
101
+ captions = {}
102
+
103
+ for image_file in image_files:
104
+ image_path = os.path.join(folder_path, image_file)
105
+ image = Image.open(image_path)
106
+ caption = generate_caption(image)
107
+ captions[image_path] = caption
108
+
109
+ # Display images in a 4-column grid
110
+ cols = st.columns(4)
111
+ for idx, (image_path, caption) in enumerate(captions.items()):
112
+ with cols[idx % 4]:
113
+ st.image(image_path, caption=caption)
114
+
115
+ if query:
116
+ results = search_captions(query, captions)
117
+ st.write("Search Results:")
118
+ for image_path, caption in results:
119
+ st.image(image_path, caption=caption)
120
+
121
+ # Save captions to CSV
122
+ if st.button("Save captions to excel"):
123
+ df = pd.DataFrame(list(captions.items()), columns=['Image', 'Caption'])
124
+ save_path = st.text_input("Enter the path to save the Excel file:", folder_path)
125
+ if save_path:
126
+ if not os.path.exists(save_path):
127
+ os.makedirs(save_path)
128
+ excel_file_path = os.path.join(save_path, "captions.xlsx")
129
+ df.to_excel(excel_file_path, index=False)
130
+ st.success(f"Captions saved to {excel_file_path}")
131
+
132
+ def live_camera_captioning_page():
133
+ st.title("Live Captioning with Webcam")
134
+ run = st.checkbox('Run')
135
+ FRAME_WINDOW = st.image([])
136
+
137
+ camera = cv2.VideoCapture(0)
138
+
139
+ while run:
140
+ ret, frame = camera.read()
141
+ if not ret:
142
+ st.write("Failed to capture image.")
143
+ break
144
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
145
+ FRAME_WINDOW.image(frame)
146
+ pil_image = Image.fromarray(frame)
147
+ caption = generate_caption(pil_image)
148
+ st.write("Caption: ", caption)
149
+ cv2.waitKey(500) # Capture an image every 0.5 seconds
150
+
151
+ camera.release()
152
+
153
+ def video_captioning_page():
154
+ st.title("Video Captioning")
155
+
156
+ # Sidebar for search functionality
157
+ with st.sidebar:
158
+ query = st.text_input("Search videos by caption:")
159
+
160
+ # Right side for folder path input and displaying videos
161
+ folder_path = st.text_input("Enter the folder path containing videos:")
162
+
163
+ if folder_path and os.path.isdir(folder_path):
164
+ video_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('mp4', 'avi', 'mov', 'mkv'))]
165
+ captions = {}
166
+
167
+ for video_file in video_files:
168
+ video_path = os.path.join(folder_path, video_file)
169
+ frames, captions_df = process_video(video_path, frame_interval=20)
170
+
171
+ if frames and not captions_df.empty:
172
+ generated_captions = ' '.join(captions_df['Caption'])
173
+ summary = summarize_pipe(generated_captions)[0]['summary_text']
174
+ captions[video_path] = summary
175
+
176
+ # Display videos in a 4-column grid
177
+ cols = st.columns(4)
178
+ for idx, (video_path, summary) in enumerate(captions.items()):
179
+ with cols[idx % 4]:
180
+ st.video(video_path, caption=summary)
181
+
182
+ if query:
183
+ results = search_captions(query, captions)
184
+ st.write("Search Results:")
185
+ for video_path, summary in results:
186
+ st.video(video_path, caption=summary)
187
+
188
+ # Save captions to CSV
189
+ if st.button("Save captions to excel"):
190
+ df = pd.DataFrame(list(captions.items()), columns=['Video', 'Caption'])
191
+ save_path = st.text_input("Enter the path to save the Excel file:", folder_path)
192
+ if save_path:
193
+ if not os.path.exists(save_path):
194
+ os.makedirs(save_path)
195
+ excel_file_path = os.path.join(save_path, "captions.xlsx")
196
+ df.to_excel(excel_file_path, index=False)
197
+ st.success(f"Captions saved to {excel_file_path}")
198
+
199
+ def main():
200
+ st.sidebar.title("Navigation")
201
+ page = st.sidebar.selectbox("Select a page", ["Image Captioning", "Live Camera Captioning", "Video Captioning"])
202
+
203
+ if page == "Image Captioning":
204
+ image_captioning_page()
205
+ elif page == "Live Camera Captioning":
206
+ live_camera_captioning_page()
207
+ elif page == "Video Captioning":
208
+ video_captioning_page()
209
+
210
+ if __name__ == "__main__":
211
+ main()