NourFakih commited on
Commit
f42cfc0
·
verified ·
1 Parent(s): 2f14b4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import cv2
4
+ import pandas as pd
5
+ from PIL import Image
6
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
7
+ import nltk
8
+ import tempfile
9
+ import zipfile
10
+
11
+ nltk.download('wordnet')
12
+ nltk.download('omw-1.4')
13
+
14
+ # Load the pre-trained models for image captioning and summarization
15
+ model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
16
+ model = VisionEncoderDecoderModel.from_pretrained(model_name)
17
+ feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+
20
+ model_sum_name = "google-t5/t5-base"
21
+ tokenizer_sum = AutoTokenizer.from_pretrained("google-t5/t5-base")
22
+ model_sum = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
23
+ summarize_pipe = pipeline("summarization", model=model_sum_name)
24
+
25
+ def generate_caption(image):
26
+ pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
27
+ output_ids = model.generate(pixel_values)
28
+ caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
29
+ return caption
30
+
31
+ def get_synonyms(word):
32
+ synonyms = set()
33
+ for syn in wordnet.synsets(word):
34
+ for lemma in syn.lemmas():
35
+ synonyms.add(lemma.name())
36
+ return synonyms
37
+
38
+ def search_captions(query, captions):
39
+ query_words = query.split()
40
+ query_synonyms = set(query_words)
41
+ for word in query_words:
42
+ query_synonyms.update(get_synonyms(word))
43
+
44
+ results = []
45
+ for path, caption in captions.items():
46
+ if any(word in caption.split() for word in query_synonyms):
47
+ results.append((path, caption))
48
+
49
+ return results
50
+
51
+ def process_video(video_path, frame_interval):
52
+ cap = cv2.VideoCapture(video_path)
53
+ frames = []
54
+ captions = []
55
+ success, frame = cap.read()
56
+ count = 0
57
+ while success:
58
+ if count % frame_interval == 0:
59
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
60
+ pil_image = Image.fromarray(frame_rgb)
61
+ caption = generate_caption(pil_image)
62
+ frames.append(frame)
63
+ captions.append(caption)
64
+ success, frame = cap.read()
65
+ count += 1
66
+ cap.release()
67
+ df = pd.DataFrame({'Frame': frames, 'Caption': captions})
68
+ return frames, df
69
+
70
+ st.title("Combined Video Captioning and Gallery App")
71
+
72
+ # Sidebar for search functionality
73
+ with st.sidebar:
74
+ query = st.text_input("Search videos by caption:")
75
+
76
+ # Options for input strategy
77
+ input_option = st.selectbox("Select input method:", ["Folder Path", "Upload Video", "Upload ZIP"])
78
+
79
+ video_files = []
80
+
81
+ if input_option == "Folder Path":
82
+ folder_path = st.text_input("Enter the folder path containing videos:")
83
+ if folder_path and os.path.isdir(folder_path):
84
+ video_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('mp4', 'avi', 'mov', 'mkv'))]
85
+
86
+ elif input_option == "Upload Video":
87
+ uploaded_files = st.file_uploader("Upload video files", type=["mp4", "avi", "mov", "mkv"], accept_multiple_files=True)
88
+ if uploaded_files:
89
+ for uploaded_file in uploaded_files:
90
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
91
+ temp_file.write(uploaded_file.read())
92
+ video_files.append(temp_file.name)
93
+
94
+ elif input_option == "Upload ZIP":
95
+ uploaded_zip = st.file_uploader("Upload a ZIP file containing videos", type=["zip"])
96
+ if uploaded_zip:
97
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
98
+ temp_file.write(uploaded_zip.read())
99
+ with zipfile.ZipFile(temp_file.name, 'r') as zip_ref:
100
+ zip_ref.extractall("/tmp/videos")
101
+ video_files = [os.path.join("/tmp/videos", f) for f in zip_ref.namelist() if f.lower().endswith(('mp4', 'avi', 'mov', 'mkv'))]
102
+
103
+ if video_files:
104
+ captions = {}
105
+ for video_file in video_files:
106
+ frames, captions_df = process_video(video_file, frame_interval=20)
107
+
108
+ if frames and not captions_df.empty:
109
+ generated_captions = ' '.join(captions_df['Caption'])
110
+ summary = summarize_pipe(generated_captions)[0]['summary_text']
111
+ captions[video_file] = summary
112
+
113
+ # Display videos in a 4-column grid
114
+ cols = st.columns(4)
115
+ for idx, (video_path, summary) in enumerate(captions.items()):
116
+ with cols[idx % 4]:
117
+ st.video(video_path, caption=summary)
118
+
119
+ if query:
120
+ results = search_captions(query, captions)
121
+ st.write("Search Results:")
122
+ for video_path, summary in results:
123
+ st.video(video_path, caption=summary)
124
+
125
+ # Save captions to CSV and provide a download button
126
+ if st.button("Generate CSV"):
127
+ df = pd.DataFrame(list(captions.items()), columns=['Video', 'Caption'])
128
+ csv = df.to_csv(index=False)
129
+ st.download_button(label="Download captions as CSV", data=csv, file_name="captions.csv", mime="text/csv")