bpiyush commited on
Commit
b190ca2
·
1 Parent(s): ebe4290

New app loaded

Browse files
Files changed (2) hide show
  1. app.py +49 -30
  2. app_old.py +226 -0
app.py CHANGED
@@ -136,7 +136,7 @@ if __name__ == "__main__":
136
  st.markdown(
137
  "> **Note**: This demo shows cut out segments from clips in the [Condensed Movies](https://www.robots.ox.ac.uk/~vgg/data/condensed-movies/) dataset. "\
138
  "The segments are adjudged to have Foley sounds based on AudioMAE predictions on AudioSet classes. "\
139
- "The segments with duration not in [2s, 30s] or those with silence or high probability of speech/music sounds are filtered out. "\
140
  "However, segments can still be noisy. Furthermore, even if a clip has "\
141
  "Foley, there can still be background music/score which we have not removed yet."
142
  )
@@ -150,34 +150,50 @@ if __name__ == "__main__":
150
  )
151
 
152
  csv_path = "./clips.csv"
153
- ann_dirs = glob(join(".", "annotations_", "*"))
154
- annot_paths = glob(join(".", "annotations_*", "*_filtered.json"))
 
 
155
  per_video_width = 360
156
  per_video_height = 240
157
  print("Total number of clips: {}".format(len(annot_paths)))
158
 
159
 
160
  if "data" not in st.session_state:
161
- # store video ids
162
- video_ids = [basename(x).split("_filtered.json")[0] for x in annot_paths]
 
163
 
164
- # load annotation data
165
  data = [load_json(p) for p in annot_paths]
166
- num_foley_per_clip = [sum(d["keep_status"]) for d in data]
167
- num_foley_segments = np.sum(num_foley_per_clip)
168
- data = [d for d, n in zip(data, num_foley_per_clip) if n > 0]
 
 
 
 
 
169
 
170
  # get movie titles
171
  df = pd.read_csv(csv_path)
 
 
 
172
  titles = df["title"].values
173
-
174
  # store variables
175
  st.session_state.titles = titles
176
  st.session_state.video_ids = video_ids
177
  st.session_state.data = data
178
  st.session_state.num_foley_segments = num_foley_segments
179
 
180
-
 
 
 
 
 
181
  reload_button = st.button("Reload")
182
  index = np.random.randint(0, len(st.session_state.data))
183
  if reload_button:
@@ -186,21 +202,27 @@ if __name__ == "__main__":
186
  # Gather data
187
  annot = st.session_state.data[index]
188
  video_id = st.session_state.video_ids[index]
189
- seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
190
- keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
191
- for k in keys:
192
- annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
193
- del annot["keep_status"]
194
- labels = [
195
- summarize_classification_probs(
196
- annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
197
- ) for i in range(len(annot["non_speech_segments"]))
198
- ]
199
- segments, durations = annot["non_speech_segments"], annot["duration"]
200
- movie = st.session_state.titles[index]
201
-
202
-
203
- st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
 
 
 
 
 
 
204
 
205
  # Create a grid of videos
206
  grid = make_grid(3, 3)
@@ -220,7 +242,4 @@ if __name__ == "__main__":
220
  grid[i][j].markdown(html_code, unsafe_allow_html=True)
221
  grid[i][j].caption(f"{labels[idx]}")
222
 
223
-
224
- st.markdown("##### Some stats")
225
- st.write(f"Total number of unique clips: {len(st.session_state.data)}")
226
- st.write("Total number of foley segments: {}".format(st.session_state.num_foley_segments))
 
136
  st.markdown(
137
  "> **Note**: This demo shows cut out segments from clips in the [Condensed Movies](https://www.robots.ox.ac.uk/~vgg/data/condensed-movies/) dataset. "\
138
  "The segments are adjudged to have Foley sounds based on AudioMAE predictions on AudioSet classes. "\
139
+ "The segments are cut into fixed 5s long chunks. Those with silence or high probability of speech/music sounds are filtered out. "\
140
  "However, segments can still be noisy. Furthermore, even if a clip has "\
141
  "Foley, there can still be background music/score which we have not removed yet."
142
  )
 
150
  )
151
 
152
  csv_path = "./clips.csv"
153
+ ann_dirname = "annotations-v2.0"
154
+ ext = ".json"
155
+ ann_dirs = glob(join(".", f"{ann_dirname}_", "*"))
156
+ annot_paths = glob(join(".", f"{ann_dirname}_*", f"*{ext}"))
157
  per_video_width = 360
158
  per_video_height = 240
159
  print("Total number of clips: {}".format(len(annot_paths)))
160
 
161
 
162
  if "data" not in st.session_state:
163
+
164
+ # Store video ids
165
+ video_ids = [basename(x).split(ext)[0] for x in annot_paths]
166
 
167
+ # Load annotation data
168
  data = [load_json(p) for p in annot_paths]
169
+
170
+ # Filter those examples with no foley segments
171
+ indices = [i for i, d in enumerate(data) if len(d["segments"]) > 0]
172
+ data = [d for i, d in enumerate(data) if i in indices]
173
+ video_ids = [video_ids[i] for i in indices]
174
+
175
+ # Compute the total number of Foley segments
176
+ num_foley_segments = np.sum([len(d["segments"]) for d in data])
177
 
178
  # get movie titles
179
  df = pd.read_csv(csv_path)
180
+ df = df[df["videoid"].isin(video_ids)]
181
+ # reorder rows to match video_ids
182
+ df = df.set_index("videoid").loc[video_ids].reset_index()
183
  titles = df["title"].values
184
+
185
  # store variables
186
  st.session_state.titles = titles
187
  st.session_state.video_ids = video_ids
188
  st.session_state.data = data
189
  st.session_state.num_foley_segments = num_foley_segments
190
 
191
+
192
+ st.markdown("##### Some stats")
193
+ st.write(f"Total number of unique clips: {len(st.session_state.data)}")
194
+ st.write("Total number of foley segments: {}".format(st.session_state.num_foley_segments))
195
+ st.markdown("---")
196
+
197
  reload_button = st.button("Reload")
198
  index = np.random.randint(0, len(st.session_state.data))
199
  if reload_button:
 
202
  # Gather data
203
  annot = st.session_state.data[index]
204
  video_id = st.session_state.video_ids[index]
205
+ title = st.session_state.titles[index]
206
+
207
+ segments = annot["segments"]
208
+ durations = [np.round(y - x, 2) for [x, y] in annot["segments"]]
209
+ labels = annot["labels"]
210
+
211
+ # seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
212
+ # keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
213
+ # for k in keys:
214
+ # annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
215
+ # del annot["keep_status"]
216
+ # labels = [
217
+ # summarize_classification_probs(
218
+ # annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
219
+ # ) for i in range(len(annot["non_speech_segments"]))
220
+ # ]
221
+ # segments, durations = annot["non_speech_segments"], annot["duration"]
222
+ # movie = st.session_state.titles[index]
223
+
224
+
225
+ st.markdown(f"Showing Foley segments from a clip in movie: **{title}**")
226
 
227
  # Create a grid of videos
228
  grid = make_grid(3, 3)
 
242
  grid[i][j].markdown(html_code, unsafe_allow_html=True)
243
  grid[i][j].caption(f"{labels[idx]}")
244
 
245
+
 
 
 
app_old.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit demo to visualize auto-annotated Foley segments from movie clips."""
2
+ import os
3
+ from os.path import join, exists, dirname, abspath, basename
4
+ import json
5
+ from glob import glob
6
+
7
+ from tqdm import tqdm
8
+ import numpy as np
9
+ import pandas as pd
10
+ import streamlit as st
11
+ from moviepy.video.io.VideoFileClip import VideoFileClip
12
+
13
+ import warnings
14
+ warnings.simplefilter(action='ignore')
15
+
16
+
17
+ curr_filepath = abspath(__file__)
18
+ repo_path = dirname(dirname(curr_filepath))
19
+
20
+
21
+ def load_json(path: str) -> dict:
22
+ """Helper to load json file"""
23
+ with open(path, 'rb') as f:
24
+ data = json.load(f)
25
+ return data
26
+
27
+
28
+ def tqdm_iterator(items, desc=None, bar_format=None, **kwargs):
29
+ tqdm._instances.clear()
30
+ iterator = tqdm(
31
+ items,
32
+ desc=desc,
33
+ bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
34
+ **kwargs,
35
+ )
36
+
37
+ return iterator
38
+
39
+
40
+ def get_data_root_from_hostname():
41
+ import socket
42
+
43
+ data_root_lib = {
44
+ "diva": "/ssd/pbagad/datasets/",
45
+ "node": "/var/scratch/pbagad/datasets/",
46
+ "fs4": "/var/scratch/pbagad/datasets/",
47
+ }
48
+ hostname = socket.gethostname()
49
+ hostname = hostname[:4]
50
+
51
+ data_root = data_root_lib.get(hostname, "NA")
52
+ return data_root
53
+
54
+
55
+ def load_clips_df(df_path, data_dir, verbose=True, use_local=False):
56
+ assert exists(df_path), f"File {df_path} does not exist"
57
+ df = pd.read_csv(df_path)
58
+ print(df.columns)
59
+ if verbose:
60
+ print("Number of clips:", len(df))
61
+ # filter out clips that are not downloaded
62
+ df["video_path"] = df["videoid"].apply(lambda x: join(data_dir, "pytube_videos", f"{x}.mp4"))
63
+ if use_local:
64
+ df = df[df["video_path"].apply(exists)]
65
+ if verbose:
66
+ print("Number of clips (with videos available):", len(df))
67
+ df["audio_path"] = df["videoid"].apply(lambda x: join(data_dir, "pytube_audio", f"{x}.wav"))
68
+ if use_local:
69
+ df = df[df["audio_path"].apply(exists)]
70
+ if verbose:
71
+ print("Number of clips (with audio available):", len(df))
72
+ df["annot_path"] = df["videoid"].apply(lambda x: join(data_dir, "annotations", f"{x}.json"))
73
+ if use_local:
74
+ df = df[df["annot_path"].apply(exists)]
75
+ if verbose:
76
+ print("Number of clips (with annotations available):", len(df))
77
+ return df
78
+
79
+
80
+ def summarize_classification_probs(silence, probs):
81
+ summary = [f"Silence: {silence}"]
82
+ summary += [f"{l.capitalize()}: {p}" for (l, p) in probs]
83
+ return " | ".join(summary)
84
+
85
+
86
+ def cut_video_in_segments(video_path, segments):
87
+ video = VideoFileClip(video_path)
88
+ tmp_dir = os.path.join(os.path.expanduser("~"), "tmp")
89
+ clip_paths = [f"{tmp_dir}/clip_{i}.mp4" for i in range(len(segments))]
90
+ iterator = tqdm_iterator(
91
+ zip(segments, clip_paths), total=len(segments), desc="Preparing clips",
92
+ )
93
+ clips = [
94
+ video.subclip(x, y).write_videofile(f, logger=None, verbose=False) \
95
+ for (x, y), f in iterator
96
+ ]
97
+ return clip_paths
98
+
99
+
100
+ def process_sample(row):
101
+
102
+ video_path = row["video_path"]
103
+ audio_path = row["audio_path"]
104
+
105
+ annot = load_json(row["annot_filtered"])
106
+ seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
107
+
108
+ keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
109
+ for k in keys:
110
+ annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
111
+ del annot["keep_status"]
112
+
113
+ labels = [
114
+ summarize_classification_probs(
115
+ annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
116
+ ) for i in range(len(annot["non_speech_segments"]))
117
+ ]
118
+ clip_paths = cut_video_in_segments(video_path, annot["non_speech_segments"])
119
+ return clip_paths, labels, annot["non_speech_segments"], annot["duration"]
120
+
121
+
122
+ def make_grid(cols,rows):
123
+ grid = [0]*cols
124
+ for i in range(cols):
125
+ with st.container():
126
+ grid[i] = st.columns(rows)
127
+ return grid
128
+
129
+
130
+ if __name__ == "__main__":
131
+
132
+ # Streamlit app code
133
+ st.set_page_config(layout="wide")
134
+ st.title("Foley Segments from Condensed Movies Dataset 🎬")
135
+
136
+ st.markdown(
137
+ "> **Note**: This demo shows cut out segments from clips in the [Condensed Movies](https://www.robots.ox.ac.uk/~vgg/data/condensed-movies/) dataset. "\
138
+ "The segments are adjudged to have Foley sounds based on AudioMAE predictions on AudioSet classes. "\
139
+ "The segments with duration not in [2s, 30s] or those with silence or high probability of speech/music sounds are filtered out. "\
140
+ "However, segments can still be noisy. Furthermore, even if a clip has "\
141
+ "Foley, there can still be background music/score which we have not removed yet."
142
+ )
143
+ st.markdown(
144
+ """> <span style="color:red">Warning</span>: Currently, each clip can be played only once. Replaying starts the clip from beginning of the video.""",
145
+ unsafe_allow_html=True
146
+ )
147
+ st.markdown(
148
+ "**Instructions**: Click the **Reload** button to see segments from a new clip. "\
149
+ "Reloading the page is not necessary."
150
+ )
151
+
152
+ csv_path = "./clips.csv"
153
+ ann_dirs = glob(join(".", "annotations_", "*"))
154
+ annot_paths = glob(join(".", "annotations_*", "*_filtered.json"))
155
+ per_video_width = 360
156
+ per_video_height = 240
157
+ print("Total number of clips: {}".format(len(annot_paths)))
158
+
159
+
160
+ if "data" not in st.session_state:
161
+ # store video ids
162
+ video_ids = [basename(x).split("_filtered.json")[0] for x in annot_paths]
163
+
164
+ # load annotation data
165
+ data = [load_json(p) for p in annot_paths]
166
+ num_foley_per_clip = [sum(d["keep_status"]) for d in data]
167
+ num_foley_segments = np.sum(num_foley_per_clip)
168
+ data = [d for d, n in zip(data, num_foley_per_clip) if n > 0]
169
+
170
+ # get movie titles
171
+ df = pd.read_csv(csv_path)
172
+ titles = df["title"].values
173
+
174
+ # store variables
175
+ st.session_state.titles = titles
176
+ st.session_state.video_ids = video_ids
177
+ st.session_state.data = data
178
+ st.session_state.num_foley_segments = num_foley_segments
179
+
180
+
181
+ reload_button = st.button("Reload")
182
+ index = np.random.randint(0, len(st.session_state.data))
183
+ if reload_button:
184
+ index = np.random.randint(0, len(st.session_state.data))
185
+
186
+ # Gather data
187
+ annot = st.session_state.data[index]
188
+ video_id = st.session_state.video_ids[index]
189
+ seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
190
+ keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
191
+ for k in keys:
192
+ annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
193
+ del annot["keep_status"]
194
+ labels = [
195
+ summarize_classification_probs(
196
+ annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
197
+ ) for i in range(len(annot["non_speech_segments"]))
198
+ ]
199
+ segments, durations = annot["non_speech_segments"], annot["duration"]
200
+ movie = st.session_state.titles[index]
201
+
202
+
203
+ st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
204
+
205
+ # Create a grid of videos
206
+ grid = make_grid(3, 3)
207
+
208
+ # Add videos to the grid
209
+ for idx in range(0, min(len(segments), 9)):
210
+ i, j = idx // 3, idx % 3
211
+
212
+ start, end = segments[idx]
213
+ duration = durations[idx]
214
+
215
+ grid[i][j].caption(f"Segment duration: {duration}")
216
+ url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
217
+ html_code = f"""
218
+ <iframe height="{per_video_height}" width="{per_video_width}" src="{url}" frameborder="0" allowfullscreen></iframe>
219
+ """
220
+ grid[i][j].markdown(html_code, unsafe_allow_html=True)
221
+ grid[i][j].caption(f"{labels[idx]}")
222
+
223
+
224
+ st.markdown("##### Some stats")
225
+ st.write(f"Total number of unique clips: {len(st.session_state.data)}")
226
+ st.write("Total number of foley segments: {}".format(st.session_state.num_foley_segments))