Spaces:

orrzohar
/

Video-STaR

Running

File size: 4,021 Bytes

import math
import gradio as gr
import os
import json

# Paths to the JSON files
json_files = {
    "Kinetics700":  "kinetics700_tune_.json",
    "STAR-benchmark":  "starb_tune_.json",
    "FineDiving":  "finediving_tune_.json"
}

VIDEO_NAME = 'video_name'
QUESTION = 'question'
LABEL = 'label'
PREDICTION = 'prediction'
left_side_columns = [VIDEO_NAME]
right_side_columns = [QUESTION, LABEL, PREDICTION]
batch_size = 4
target_size = (1024, 1024)

def func(index, dataset):
    json_file = json_files[dataset]
    start_index = index * batch_size
    end_index = start_index + batch_size
    with open(json_file, 'r') as f:
        data = json.load(f)
    all_examples = data[start_index:end_index]
    values_lst = []
    for example_idx, example in enumerate(all_examples):
        values = get_instance_values(example, dataset)
        values_lst += values
    return values_lst

def get_instance_values(example, dataset_name):
    example[VIDEO_NAME] = os.path.abspath(os.path.join(dataset_name, example[VIDEO_NAME]))
    values = []
    for k in left_side_columns + right_side_columns:
        value = example[k]
        values.append(value)
    return values

demo = gr.Blocks()

def get_col(example, dataset_name):
    instance_values = get_instance_values(example, dataset_name)
    with gr.Column():
        inputs_left = []
        assert len(left_side_columns) == len(instance_values[:len(left_side_columns)])  # excluding the video
        for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]):
            if key == VIDEO_NAME:
                if os.path.exists(value):  # Check if the video file exists
                    input_k = gr.Video(value=value)
                else:
                    input_k = gr.Textbox(value=f"Video file not found: {value}", label=f"{key.capitalize()}")
            else:
                label = key.capitalize().replace("_", " ")
                input_k = gr.Textbox(value=value, label=f"{label}")
            inputs_left.append(input_k)
        with gr.Accordion("Click for details", open=False):
            text_inputs_right = []
            assert len(right_side_columns) == len(instance_values[len(left_side_columns):])
            for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]):
                label = key.capitalize().replace("_", " ")
                if key == PREDICTION:
                    text_input_k = gr.Textbox(value=value, label=f"{label}", lines=7)
                elif key == QUESTION:
                    text_input_k = gr.Textbox(value=value, label=f"{label}", lines=2)
                else:
                    text_input_k = gr.Textbox(value=value, label=f"{label}")
                text_inputs_right.append(text_input_k)
    return inputs_left, text_inputs_right

with demo:
    with gr.Column():
        dataset_dropdown = gr.Dropdown(choices=list(json_files.keys()), label="Select Dataset", value="Kinetics700")
        
        # Load the selected dataset to determine the number of samples
        dataset = dataset_dropdown.value
        with open(json_files[dataset], 'r') as f:
            data = json.load(f)
        num_samples = len(data)
        
        slider = gr.Slider(minimum=0, maximum=math.floor(num_samples / batch_size) - 1, step=1, label='Page')

        with gr.Row():
            index = slider.value
            start_index = 0 * batch_size
            end_index = start_index + batch_size
            all_examples = data[start_index:end_index]
            all_inputs_left_right = []
            for example_idx, example in enumerate(all_examples):
                inputs_left, text_inputs_right = get_col(example, dataset)
                inputs_left_right = inputs_left + text_inputs_right
                all_inputs_left_right += inputs_left_right

    slider.change(func, inputs=[slider, dataset_dropdown], outputs=all_inputs_left_right)
    dataset_dropdown.change(func, inputs=[slider, dataset_dropdown], outputs=all_inputs_left_right)

demo.launch()