bpiyush commited on
Commit
8a09121
·
1 Parent(s): 91b9124

WIP: Making the demo work on hf spaces

Browse files
Files changed (1) hide show
  1. app.py +51 -56
app.py CHANGED
@@ -1,7 +1,8 @@
1
  """Streamlit demo to visualize auto-annotated Foley segments from movie clips."""
2
  import os
3
- from os.path import join, exists, dirname, abspath
4
  import json
 
5
 
6
  from tqdm import tqdm
7
  import numpy as np
@@ -147,78 +148,72 @@ if __name__ == "__main__":
147
  "**Instructions**: Click the **Reload** button to see segments from a new clip. "\
148
  "Reloading the page is not necessary."
149
  )
150
-
151
- use_local = False
152
- data_root = get_data_root_from_hostname()
153
- data_dir = join(data_root, "CondensedMovies")
154
- video_dir = join(data_dir, "pytube_videos")
155
-
156
- annot_dir = join(repo_path, "external/CondensedMovies/data/metadata/")
157
-
158
- if "subdf" not in st.session_state:
159
- df = load_clips_df(join(".", "clips.csv"), data_dir, verbose=True)
160
- df["annot_filtered"] = df["annot_path"].apply(lambda x: x.replace(".json", "_filtered.json"))
161
- df = df[df["annot_filtered"].apply(exists)]
162
- df["num_foley_segments"] = df["annot_filtered"].apply(lambda f: sum(load_json(f)["keep_status"]))
163
- subdf = df[df["num_foley_segments"].apply(lambda x: x > 0)]
164
- st.session_state.subdf = subdf
165
- num_foley = subdf["num_foley_segments"].sum()
166
- st.session_state.num_foley = num_foley
167
- print("Loaded subdf with {} rows".format(len(subdf)))
 
 
 
168
 
169
 
170
  reload_button = st.button("Reload")
171
- # index = 0
172
- index = np.random.randint(0, len(st.session_state.subdf))
173
  if reload_button:
174
- index = np.random.randint(0, len(st.session_state.subdf))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- row = st.session_state.subdf.iloc[index].to_dict()
177
- if use_local:
178
- clip_paths, labels, segments, durations = process_sample(row)
179
- else:
180
- annot = load_json(row["annot_filtered"])
181
- seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
182
- keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
183
- for k in keys:
184
- annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
185
- del annot["keep_status"]
186
- labels = [
187
- summarize_classification_probs(
188
- annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
189
- ) for i in range(len(annot["non_speech_segments"]))
190
- ]
191
- segments, durations = annot["non_speech_segments"], annot["duration"]
192
- clip_paths = [f"https://www.youtube.com/watch?v={row['videoid']}"] * len(segments)
193
-
194
- # Make a grid of videos and captions in streamlit
195
- videos = clip_paths
196
- video_id = row["videoid"]
197
- movie = row["title"]
198
  st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
199
 
200
  # Create a grid of videos
201
  grid = make_grid(3, 3)
202
 
203
  # Add videos to the grid
204
- for idx in range(0, min(len(videos), 9)):
205
  i, j = idx // 3, idx % 3
206
 
207
  start, end = segments[idx]
208
  duration = durations[idx]
209
 
210
  grid[i][j].caption(f"Segment duration: {duration}")
211
- if not use_local:
212
- url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
213
- html_code = f"""
214
- <iframe height="320" width="420" src="{url}" frameborder="0" allowfullscreen></iframe>
215
- """
216
- grid[i][j].markdown(html_code, unsafe_allow_html=True)
217
- else:
218
- grid[i][j].video(videos[idx])
219
  grid[i][j].caption(f"{labels[idx]}")
220
-
221
 
222
  st.markdown("##### Some stats")
223
- st.write(f"Total number of unique clips: {len(st.session_state.subdf)}")
224
- st.write(f"Total number of foley segments: {st.session_state.num_foley}")
 
1
  """Streamlit demo to visualize auto-annotated Foley segments from movie clips."""
2
  import os
3
+ from os.path import join, exists, dirname, abspath, basename
4
  import json
5
+ from glob import glob
6
 
7
  from tqdm import tqdm
8
  import numpy as np
 
148
  "**Instructions**: Click the **Reload** button to see segments from a new clip. "\
149
  "Reloading the page is not necessary."
150
  )
151
+
152
+ csv_path = "./clips.csv"
153
+ ann_dirs = glob(join(".", "annotations_", "*"))
154
+ annot_paths = glob(join(".", "annotations_*", "*_filtered.json"))
155
+ print("Total number of clips: {}".format(len(annot_paths)))
156
+
157
+
158
+ if "data" not in st.session_state:
159
+ # store video ids
160
+ video_ids = [basename(x).split("_filtered.json")[0] for x in annot_paths]
161
+
162
+ # load annotation data
163
+ data = [load_json(p) for p in annot_paths]
164
+ num_foley_per_clip = [sum(d["keep_status"]) for d in data]
165
+ num_foley_segments = np.sum(num_foley_per_clip)
166
+ data = [d for d, n in zip(data, num_foley_per_clip) if n > 0]
167
+
168
+ # store variables
169
+ st.session_state.video_ids = video_ids
170
+ st.session_state.data = data
171
+ st.session_state.num_foley_segments = num_foley_segments
172
 
173
 
174
  reload_button = st.button("Reload")
175
+ index = np.random.randint(0, len(st.session_state.data))
 
176
  if reload_button:
177
+ index = np.random.randint(0, len(st.session_state.data))
178
+
179
+ # Gather data
180
+ annot = st.session_state.data[index]
181
+ video_id = st.session_state.video_ids[index]
182
+ seg_indices = [i for i, flag in enumerate(annot["keep_status"]) if flag]
183
+ keys = ["non_speech_segments", "silence_prob", "audiomae_on_audioset", "duration"]
184
+ for k in keys:
185
+ annot[k] = [x for i, x in enumerate(annot[k]) if i in seg_indices]
186
+ del annot["keep_status"]
187
+ labels = [
188
+ summarize_classification_probs(
189
+ annot["silence_prob"][i], annot["audiomae_on_audioset"][i]
190
+ ) for i in range(len(annot["non_speech_segments"]))
191
+ ]
192
+ segments, durations = annot["non_speech_segments"], annot["duration"]
193
+ movie = annot["title"]
194
+
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  st.markdown(f"Showing Foley segments from a clip in movie: **{movie}**")
197
 
198
  # Create a grid of videos
199
  grid = make_grid(3, 3)
200
 
201
  # Add videos to the grid
202
+ for idx in range(0, min(len(segments), 9)):
203
  i, j = idx // 3, idx % 3
204
 
205
  start, end = segments[idx]
206
  duration = durations[idx]
207
 
208
  grid[i][j].caption(f"Segment duration: {duration}")
209
+ url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
210
+ html_code = f"""
211
+ <iframe height="320" width="420" src="{url}" frameborder="0" allowfullscreen></iframe>
212
+ """
213
+ grid[i][j].markdown(html_code, unsafe_allow_html=True)
 
 
 
214
  grid[i][j].caption(f"{labels[idx]}")
215
+
216
 
217
  st.markdown("##### Some stats")
218
+ st.write(f"Total number of unique clips: {len(st.session_state.data)}")
219
+ st.write("Total number of foley segments: {}".format(st.session_state.num_foley_segments))