File size: 6,675 Bytes
1deee38
 
 
 
 
 
f406228
1deee38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72f1b23
1deee38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import gradio as gr
import json
from datetime import datetime
import random
import os
from huggingface_hub import Repository
random.seed(20240128)

import subprocess
# Set Git user information
subprocess.run(["git", "config", "--global", "user.email", "[email protected]"])
subprocess.run(["git", "config", "--global", "user.name", "yiduohao"])


hf_token = os.getenv("HF_TOKEN")
print("HF Token is none?", hf_token is None)

# Initialize the repository
DATASET_REPO_URL = "https://huggingface.co/datasets/Scherzando/RIR-Resound-User-Study-Response"
repo = Repository(
    local_dir="user_responses",
    clone_from=DATASET_REPO_URL,
    use_auth_token=hf_token
)

def prepare_test_cases():
    # json_path = "videos/videos.json"
    json_path = "rir/rir.json"
    with open(json_path, "r") as f:
        video_dict = json.load(f)
    
    video_ids = list(video_dict.keys())
    for video_id in video_ids:
        if random.random() > 0.5:
            video_list = [video_dict[video_id]['ours'], video_dict[video_id]['baseline']]
        else: 
            video_list = [video_dict[video_id]['baseline'], video_dict[video_id]['ours']]

        random.shuffle(video_list)

        video_dict[video_id]['Video 1'] = video_list[0]
        video_dict[video_id]['Video 2'] = video_list[1]
        video_dict[video_id]['Ground Truth'] = video_dict[video_id]['groundtruth']

    return video_dict

video_dict = prepare_test_cases()
video_ids = list(video_dict.keys())  
random.shuffle(video_ids)


questions = [
    "Between Video 1 and Video 2, which one's audio conveyed changes in audio volume more accurately compared to the Reference?",
    "Between Video 1 and Video 2, which one's audio made it easier for you to identify the direction of the sound source more accurately?",
    "Between Video 1 and Video 2, which one's audio do you feel aligns better with the Reference overall?"
]
submissions_file = "user_responses/response.jsonl"

def has_already_submitted(user_id):
    if os.path.exists(submissions_file):
        with open(submissions_file, "r") as f:
            for line in f:
                submission = json.loads(line)
                if submission.get("u_id") == user_id:
                    return True
    return False

# Save responses
def save_responses(unique_submission, *responses):
    timestamp = datetime.now().isoformat()
    info = responses[-1]
    responses = responses[:-1]
    unique_id = info["session_id"]
    user_id = f"{unique_id}"

    # Check for unique submission
    if unique_submission and has_already_submitted(user_id):
        return "You have already submitted responses. Thank you for participating!"

    # Initialize the result dictionary
    result = {
        "u_id": user_id,
        "timestamp": timestamp,
        "responses": []
    }

    for index in range(len(video_ids)):
        start_idx = index * len(questions)
        end_idx = start_idx + len(questions)

        response = responses[start_idx:end_idx]
        if any(r is None for r in response):
            return "Please answer all questions before submitting."

        video_id = video_ids[index]
        pair_response = {
            video_id: {
                'distance': video_dict[video_id][response[0]],
                'direction': video_dict[video_id][response[1]],
                'overall': video_dict[video_id][response[2]],
            }
        }
        result["responses"].append(pair_response)

    result["responses"] = sorted(result["responses"], key=lambda x: x.keys())
    
    # Save response locally and push to Hugging Face Hub
    with open(submissions_file, "a") as f:
        f.write(json.dumps(result) + "\n")
    
    # Push changes to the Hugging Face dataset repo
    repo.push_to_hub()

    return "All responses saved! Thank you for participating!"




def create_interface(unique_submission=False):
    with gr.Blocks() as demo:
        gr.Markdown("# Human Preference Study: Room Spatial Audio Rendering")
        gr.Markdown("""
            Before starting the study, please make sure you are in a quiet environment and wearing headphones, and read the following guidance carefully. 
            - In this study, you will be presented with pairs of videos **with spatial audio**. 
            - Each pair consists of a reference spatial audio (marked by **Reference**) and two generated spatial audio (marked by **Video 1** and **Video 2**). 
            - Please watch and **listen** to each row of videos carefully and answer the three associated questions. 
            - For each video, the left-hand side is the camera (head) view, and the right-hand side is the corresponding birds-eye view of the room with **speaker (blue)** and **head poses (red)**.
            
            **Binanural Headphones are required!**
        """)

        # Display video pairs and questions
        responses = []
        for index, video_id in enumerate(video_ids):
            video_gt = video_dict[video_id]['groundtruth']
            video1 = video_dict[video_id]['Video 1']
            video2 = video_dict[video_id]['Video 2']

            gr.Markdown(f"### Video Pair {index + 1}")
            with gr.Row():
                gr.Video(video_gt, label="Reference")
                gr.Video(video1, label="Video 1")
                gr.Video(video2, label="Video 2")
                # with gr.Column():
            with gr.Row():
                responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[0], value=None))
            with gr.Row():
                responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[1], value=None))
            with gr.Row():
                responses.append(gr.Radio(["Video 1", "Video 2"], label=questions[2], value=None))

            gr.Markdown("---")

        info = gr.JSON(visible=False)
        demo.load(predict, None, info)

        submit_btn = gr.Button("Submit")
        result_message = gr.Textbox(label="Message (please only submit once)", interactive=False)

        submit_btn.click(
            fn=lambda *args: save_responses(unique_submission, *args),
            inputs=responses+[info],
            outputs=result_message
        )

    return demo

def predict(request: gr.Request):
    headers = request.headers
    host = request.client.host
    user_agent = request.headers["user-agent"]
    session_id = request.session_hash
    return {
        "ip": host,
        "user_agent": user_agent,
        "headers": headers,
        "session_id": session_id
    }

if __name__ == "__main__":    
    # Launch with unique_submission set based on `--unique` flag
    demo = create_interface(unique_submission=True)
    demo.launch(share=True)