File size: 2,226 Bytes
622f342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Streamlit app"""
from os.path import join, exists, dirname, abspath
from glob import glob

import numpy as np
import pandas as pd
import torch
import torchvision
import streamlit as st

import warnings
warnings.simplefilter(action='ignore')

curr_filepath = abspath(__file__)
repo_path = dirname(curr_filepath)


temporal_terms = [
    "then",
    "before",
    "after",
    "followed by",
    "preceded by",
]


def make_grid(cols,rows):
    grid = [0]*cols
    for i in range(cols):
        with st.container():
            grid[i] = st.columns(rows)
    return grid


if __name__ == "__main__":

    # Streamlit app code
    st.set_page_config(layout="wide")
    st.title("Clips from AudioCaps (possibly of temporal nature) 🎬")
    
    # load data
    if "df" not in st.session_state:
        splits = ["train.csv", "val.csv", "test.csv"]
        dfs = [pd.read_csv(join(repo_path, "data", split)) for split in splits]
        df = pd.concat(dfs, axis=0)
        # Filter df based on whether the temporal term is in the sentence
        indices = df.caption.apply(lambda x: any([term in x for term in temporal_terms]))
        df = df[indices]
        st.session_state.df = df
    else:
        df = st.session_state.df
    st.markdown(f"**Total number of relevant clips**: {len(df)}", unsafe_allow_html=True)

    reload_button = st.button("Reload")
    NUM = 9
    indices = np.random.randint(0, len(st.session_state.df), NUM)
    if reload_button:
        indices = np.random.randint(0, len(st.session_state.df), NUM)

    grid = make_grid(3, 3)
    per_video_width = 360
    per_video_height = 240
    for i, idx in enumerate(indices):
        row = i // 3
        col = i % 3
        
        video_id = df.iloc[idx].youtube_id
        start = df.iloc[idx].start_time
        end = start + 10.

        url = f"https://www.youtube.com/embed/{video_id}?start={int(start)}&end={int(end)}"
        html_code = f"""
        <iframe height="{per_video_height}" width="{per_video_width}" src="{url}" frameborder="0" allowfullscreen></iframe>
        """
        grid[row][col].markdown(html_code, unsafe_allow_html=True)
        grid[row][col].markdown(f"**Caption**: {df.iloc[idx].caption}", unsafe_allow_html=True)