"""Demo to show clips from AudioSet with Doppler effect.""" import os from os.path import join, exists, dirname, abspath, basename import json from glob import glob from tqdm import tqdm import numpy as np import pandas as pd import streamlit as st import matplotlib.pyplot as plt # from moviepy.video.io.VideoFileClip import VideoFileClip import warnings warnings.simplefilter(action='ignore') curr_filepath = abspath(__file__) repo_path = dirname(dirname(curr_filepath)) def make_grid(cols,rows): grid = [0]*cols for i in range(cols): with st.container(): grid[i] = st.columns(rows) return grid # Filter out samples with (possible) Doppler effect doppler_classes = [ 'airplane', 'ambulance siren', # 'race car, auto racing', # Typically captured from within the car 'subway, metro, underground', 'car passing by', # 'motorboat, speedboat acceleration', # Typically captured from within the boat 'railroad car, train wagon', # 'helicopter', # 'driving snowmobile', # Typically captured from within the snowmobile 'airplane flyby', ] if __name__ == "__main__": # Streamlit app code st.set_page_config(layout="wide") st.title("Clips from VGGSound (possibly with Doppler effect) 🎬") # load data if "df" not in st.session_state: csv_path = "./data/vggsound.csv" df = pd.read_csv(csv_path) df.columns = ["video_id", "start_seconds", "label", "split"] df["end_seconds"] = df["start_seconds"] + 10. df = df[df["label"].isin(doppler_classes)] st.session_state.df = df else: df = st.session_state.df st.markdown(f"**Total number of relevant clips**: {len(df)}", unsafe_allow_html=True) # st.markdown("---") # # plot histogram # arr = np.random.normal(1, 1, size=100) # fig, ax = plt.subplots(1, 1, figsize=(1, 1)) # ax.hist(arr, bins=20) # st.pyplot(fig) # plot st bar chart st.markdown("**Distribution of classes**") count_df = df["label"].value_counts().reset_index() # sort by count count_df = count_df.sort_values(by="label", ascending=False) print(count_df) st.bar_chart(count_df, width=300, height=0) reload_button = st.button("Reload") NUM = 9 indices = np.random.randint(0, len(st.session_state.df), NUM) if reload_button: indices = np.random.randint(0, len(st.session_state.df), NUM) videoids = [] segments = [] labels = [] for index in indices: sample = st.session_state.df.iloc[index].to_dict() video_id = sample["video_id"] videoids.append(video_id) start_time = sample["start_seconds"] end_time = sample["end_seconds"] segments.append((start_time, end_time)) labels.append(sample["label"]) # st.markdown(f"Showing Foley segments from a clip in movie: **{video_id}**") # Create a grid of videos grid = make_grid(3, 3) per_video_width = 360 per_video_height = 240 # Add videos to the grid for idx in range(0, min(len(segments), 9)): i, j = idx // 3, idx % 3 start, end = segments[idx] duration = end - start video_id = videoids[idx] grid[i][j].caption(f"Segment duration: {duration}") url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}" html_code = f""" """ grid[i][j].markdown(html_code, unsafe_allow_html=True) grid[i][j].caption(f"{labels[idx]}")