import gradio as gr
import os
import random
import numpy as np
import gdown
import base64
from time import gmtime, strftime
from csv import writer
from datasets import load_dataset
from hfserver import HuggingFaceDatasetSaver, HuggingFaceDatasetJSONSaver
ENVS = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm',
'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm',
'ShadowHandDoorOpenInward', 'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandDoorCloseOutward',
'ShadowHandPushBlock', 'ShadowHandKettle',
'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwingCup', 'ShadowHandGraspAndPlace', 'ShadowHandSwitch']
# download data from huggingface dataset
# dataset = load_dataset("quantumiracle-git/robotinder-data")
LOAD_DATA_GOOGLE_DRIVE = True
if LOAD_DATA_GOOGLE_DRIVE: # download data from google drive
# url = 'https://drive.google.com/drive/folders/1JuNQS4R7axTezWj1x4KRAuRt_L26ApxA?usp=sharing' # './processed/' folder in google drive
url = 'https://drive.google.com/drive/folders/1o8Q9eX-J7F326zv4g2MZWlzR46uVkUF2?usp=sharing' # './processed_zip/' folder in google drive
output = './'
id = url.split('/')[-1]
os.system(f"gdown --id {id} -O {output} --folder --no-cookies --remaining-ok")
VIDEO_PATH = 'processed_zip'
import zipfile
from os import listdir
from os.path import isfile, join, isdir
# unzip the zip files to the same location and delete zip files
path_to_zip_file = VIDEO_PATH
zip_files = [join(path_to_zip_file, f) for f in listdir(path_to_zip_file)]
for f in zip_files:
if f.endswith(".zip"):
directory_to_extract_to = path_to_zip_file # extracted file itself contains a folder
print(f'extract data {f} to {directory_to_extract_to}')
with zipfile.ZipFile(f, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
os.remove(f)
else: # local data
VIDEO_PATH = 'robotinder-data'
def inference(video_path):
with open(video_path, "rb") as f:
data = f.read()
b64 = base64.b64encode(data).decode()
html = (
f"""
"""
)
return html
def video_identity(video):
return video
def nan():
return None
FORMAT = ['mp4', 'gif'][0]
def get_huggingface_dataset():
try:
import huggingface_hub
except (ImportError, ModuleNotFoundError):
raise ImportError(
"Package `huggingface_hub` not found is needed "
"for HuggingFaceDatasetSaver. Try 'pip install huggingface_hub'."
)
HF_TOKEN = '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' # my HF token
DATASET_NAME = 'crowdsourced-robotinder-demo'
FLAGGING_DIR = 'flag/'
path_to_dataset_repo = huggingface_hub.create_repo(
repo_id=DATASET_NAME,
token=HF_TOKEN,
private=False,
repo_type="dataset",
exist_ok=True,
)
dataset_dir = os.path.join(DATASET_NAME, FLAGGING_DIR)
repo = huggingface_hub.Repository(
local_dir=dataset_dir,
clone_from=path_to_dataset_repo,
use_auth_token=HF_TOKEN,
)
repo.git_pull(lfs=True)
log_file = os.path.join(dataset_dir, "flag_data.csv")
return repo, log_file
def update(user_choice, left, right, choose_env, data_folder=VIDEO_PATH, flag_to_huggingface=True):
global last_left_video_path
global last_right_video_path
global last_infer_left_video_path
global last_infer_right_video_path
if flag_to_huggingface: # log
env_name = str(last_left_video_path).split('/')[1] # 'robotinder-data/ENV_NAME/'
current_time = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
info = [env_name, user_choice, last_left_video_path, last_right_video_path, current_time]
print(info)
repo, log_file = get_huggingface_dataset()
with open(log_file, 'a') as file: # incremental change of the file
writer_object = writer(file)
writer_object.writerow(info)
file.close()
if int(current_time.split('-')[-2]) % 5 == 0: # push only on certain minutes
try:
repo.push_to_hub(commit_message=f"Flagged sample at {current_time}")
except:
repo.git_pull(lfs=True) # sync with remote first
repo.push_to_hub(commit_message=f"Flagged sample at {current_time}")
if choose_env == 'Random':
envs = parse_envs()
env_name = envs[random.randint(0, len(envs)-1)]
else:
env_name = choose_env
# choose video
videos = os.listdir(os.path.join(data_folder, env_name))
video_files = []
for f in videos:
if f.endswith(f'.{FORMAT}'):
video_files.append(os.path.join(data_folder, env_name, f))
# randomly choose two videos
selected_video_ids = np.random.choice(len(video_files), 2, replace=False)
left = video_files[selected_video_ids[0]]
right = video_files[selected_video_ids[1]]
last_left_video_path = left
last_right_video_path = right
last_infer_left_video_path = inference(left)
last_infer_right_video_path = inference(right)
return last_infer_left_video_path, last_infer_right_video_path, env_name
def replay(left, right):
return left, right
def parse_envs(folder=VIDEO_PATH):
envs = []
for f in os.listdir(folder):
if os.path.isdir(os.path.join(folder, f)):
envs.append(f)
return envs
def build_interface(iter=3, data_folder=VIDEO_PATH):
import sys
import csv
csv.field_size_limit(sys.maxsize)
HF_TOKEN = os.getenv('HF_TOKEN')
print(HF_TOKEN)
HF_TOKEN = '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' # my HF token
# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo") # HuggingFace logger instead of local one: https://github.com/gradio-app/gradio/blob/master/gradio/flagging.py
hf_writer = HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo")
# callback = gr.CSVLogger()
callback = hf_writer
# build gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Here is RoboTinder!")
gr.Markdown("### Select the best robot behaviour in your choice!")
# some initial values
envs = parse_envs()
env_name = envs[random.randint(0, len(envs)-1)] # random pick an env
with gr.Row():
str_env_name = gr.Markdown(f"{env_name}")
# choose video
videos = os.listdir(os.path.join(data_folder, env_name))
video_files = []
for f in videos:
if f.endswith(f'.{FORMAT}'):
video_files.append(os.path.join(data_folder, env_name, f))
# randomly choose two videos
selected_video_ids = np.random.choice(len(video_files), 2, replace=False)
left_video_path = video_files[selected_video_ids[0]]
right_video_path = video_files[selected_video_ids[1]]
with gr.Row():
if FORMAT == 'mp4':
# left = gr.PlayableVideo(left_video_path, label="left_video")
# right = gr.PlayableVideo(right_video_path, label="right_video")
infer_left_video_path = inference(left_video_path)
infer_right_video_path = inference(right_video_path)
right = gr.HTML(infer_right_video_path, label="right_video")
left = gr.HTML(infer_left_video_path, label="left_video")
else:
left = gr.Image(left_video_path, shape=(1024, 768), label="left_video")
# right = gr.Image(right_video_path).style(height=768, width=1024)
right = gr.Image(right_video_path, label="right_video")
global last_left_video_path
last_left_video_path = left_video_path
global last_right_video_path
last_right_video_path = right_video_path
global last_infer_left_video_path
last_infer_left_video_path = infer_left_video_path
global last_infer_right_video_path
last_infer_right_video_path = infer_right_video_path
# btn1 = gr.Button("Replay")
user_choice = gr.Radio(["Left", "Right", "Not Sure"], label="Which one is your favorite?")
choose_env = gr.Radio(["Random"]+ENVS, label="Choose the next task:")
btn2 = gr.Button("Next")
# This needs to be called at some point prior to the first call to callback.flag()
callback.setup([user_choice, left, right], "flagged_data_points")
# btn1.click(fn=replay, inputs=[left, right], outputs=[left, right])
btn2.click(fn=update, inputs=[user_choice, left, right, choose_env], outputs=[left, right, str_env_name])
# We can choose which components to flag -- in this case, we'll flag all of them
btn2.click(lambda *args: callback.flag(args), [user_choice, left, right], None, preprocess=False)
return demo
if __name__ == "__main__":
last_left_video_path = None
last_right_video_path = None
demo = build_interface()
# demo.launch(share=True)
demo.launch(share=False)