import gradio as gr import os import random import numpy as np import gdown import base64 from time import gmtime, strftime from csv import writer from datasets import load_dataset from hfserver import HuggingFaceDatasetSaver, HuggingFaceDatasetJSONSaver ENVS = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm', 'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm', 'ShadowHandDoorOpenInward', 'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandDoorCloseOutward', 'ShadowHandPushBlock', 'ShadowHandKettle', 'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwingCup', 'ShadowHandGraspAndPlace', 'ShadowHandSwitch'] # download data from huggingface dataset # dataset = load_dataset("quantumiracle-git/robotinder-data") LOAD_DATA_GOOGLE_DRIVE = True if LOAD_DATA_GOOGLE_DRIVE: # download data from google drive # url = 'https://drive.google.com/drive/folders/1JuNQS4R7axTezWj1x4KRAuRt_L26ApxA?usp=sharing' # './processed/' folder in google drive url = 'https://drive.google.com/drive/folders/1o8Q9eX-J7F326zv4g2MZWlzR46uVkUF2?usp=sharing' # './processed_zip/' folder in google drive output = './' id = url.split('/')[-1] os.system(f"gdown --id {id} -O {output} --folder --no-cookies --remaining-ok") VIDEO_PATH = 'processed_zip' import zipfile from os import listdir from os.path import isfile, join, isdir # unzip the zip files to the same location and delete zip files path_to_zip_file = VIDEO_PATH zip_files = [join(path_to_zip_file, f) for f in listdir(path_to_zip_file)] for f in zip_files: if f.endswith(".zip"): directory_to_extract_to = path_to_zip_file # extracted file itself contains a folder print(f'extract data {f} to {directory_to_extract_to}') with zipfile.ZipFile(f, 'r') as zip_ref: zip_ref.extractall(directory_to_extract_to) os.remove(f) else: # local data VIDEO_PATH = 'robotinder-data' def inference(video_path): with open(video_path, "rb") as f: data = f.read() b64 = base64.b64encode(data).decode() html = ( f""" """ ) return html def video_identity(video): return video def nan(): return None FORMAT = ['mp4', 'gif'][0] def get_huggingface_dataset(): try: import huggingface_hub except (ImportError, ModuleNotFoundError): raise ImportError( "Package `huggingface_hub` not found is needed " "for HuggingFaceDatasetSaver. Try 'pip install huggingface_hub'." ) HF_TOKEN = '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' # my HF token DATASET_NAME = 'crowdsourced-robotinder-demo' FLAGGING_DIR = 'flag/' path_to_dataset_repo = huggingface_hub.create_repo( repo_id=DATASET_NAME, token=HF_TOKEN, private=False, repo_type="dataset", exist_ok=True, ) dataset_dir = os.path.join(DATASET_NAME, FLAGGING_DIR) repo = huggingface_hub.Repository( local_dir=dataset_dir, clone_from=path_to_dataset_repo, use_auth_token=HF_TOKEN, ) repo.git_pull(lfs=True) log_file = os.path.join(dataset_dir, "flag_data.csv") return repo, log_file def update(user_choice, left, right, choose_env, data_folder=VIDEO_PATH, flag_to_huggingface=True): global last_left_video_path global last_right_video_path global last_infer_left_video_path global last_infer_right_video_path if flag_to_huggingface: # log env_name = str(last_left_video_path).split('/')[1] # 'robotinder-data/ENV_NAME/' current_time = strftime("%Y-%m-%d-%H-%M-%S", gmtime()) info = [env_name, user_choice, last_left_video_path, last_right_video_path, current_time] print(info) repo, log_file = get_huggingface_dataset() with open(log_file, 'a') as file: # incremental change of the file writer_object = writer(file) writer_object.writerow(info) file.close() if int(current_time.split('-')[-2]) % 5 == 0: # push only on certain minutes try: repo.push_to_hub(commit_message=f"Flagged sample at {current_time}") except: repo.git_pull(lfs=True) # sync with remote first repo.push_to_hub(commit_message=f"Flagged sample at {current_time}") if choose_env == 'Random': envs = parse_envs() env_name = envs[random.randint(0, len(envs)-1)] else: env_name = choose_env # choose video videos = os.listdir(os.path.join(data_folder, env_name)) video_files = [] for f in videos: if f.endswith(f'.{FORMAT}'): video_files.append(os.path.join(data_folder, env_name, f)) # randomly choose two videos selected_video_ids = np.random.choice(len(video_files), 2, replace=False) left = video_files[selected_video_ids[0]] right = video_files[selected_video_ids[1]] last_left_video_path = left last_right_video_path = right last_infer_left_video_path = inference(left) last_infer_right_video_path = inference(right) return last_infer_left_video_path, last_infer_right_video_path, env_name def replay(left, right): return left, right def parse_envs(folder=VIDEO_PATH): envs = [] for f in os.listdir(folder): if os.path.isdir(os.path.join(folder, f)): envs.append(f) return envs def build_interface(iter=3, data_folder=VIDEO_PATH): import sys import csv csv.field_size_limit(sys.maxsize) HF_TOKEN = os.getenv('HF_TOKEN') print(HF_TOKEN) HF_TOKEN = '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' # my HF token # hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo") # HuggingFace logger instead of local one: https://github.com/gradio-app/gradio/blob/master/gradio/flagging.py hf_writer = HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo") # callback = gr.CSVLogger() callback = hf_writer # build gradio interface with gr.Blocks() as demo: gr.Markdown("Here is **RoboTinder**!") gr.Markdown("Select the best robot behaviour in your choice!") # some initial values envs = parse_envs() env_name = envs[random.randint(0, len(envs)-1)] # random pick an env with gr.Row(): str_env_name = gr.Markdown(f"{env_name}") # choose video videos = os.listdir(os.path.join(data_folder, env_name)) video_files = [] for f in videos: if f.endswith(f'.{FORMAT}'): video_files.append(os.path.join(data_folder, env_name, f)) # randomly choose two videos selected_video_ids = np.random.choice(len(video_files), 2, replace=False) left_video_path = video_files[selected_video_ids[0]] right_video_path = video_files[selected_video_ids[1]] with gr.Row(): if FORMAT == 'mp4': # left = gr.PlayableVideo(left_video_path, label="left_video") # right = gr.PlayableVideo(right_video_path, label="right_video") infer_left_video_path = inference(left_video_path) infer_right_video_path = inference(right_video_path) right = gr.HTML(infer_right_video_path, label="right_video") left = gr.HTML(infer_left_video_path, label="left_video") else: left = gr.Image(left_video_path, shape=(1024, 768), label="left_video") # right = gr.Image(right_video_path).style(height=768, width=1024) right = gr.Image(right_video_path, label="right_video") global last_left_video_path last_left_video_path = left_video_path global last_right_video_path last_right_video_path = right_video_path global last_infer_left_video_path last_infer_left_video_path = infer_left_video_path global last_infer_right_video_path last_infer_right_video_path = infer_right_video_path # btn1 = gr.Button("Replay") user_choice = gr.Radio(["Left", "Right", "Not Sure"], label="Which one is your favorite?") choose_env = gr.Radio(["Random"]+ENVS, label="Choose the next task:") btn2 = gr.Button("Next") # This needs to be called at some point prior to the first call to callback.flag() callback.setup([user_choice, left, right], "flagged_data_points") # btn1.click(fn=replay, inputs=[left, right], outputs=[left, right]) btn2.click(fn=update, inputs=[user_choice, left, right, choose_env], outputs=[left, right, str_env_name]) # We can choose which components to flag -- in this case, we'll flag all of them btn2.click(lambda *args: callback.flag(args), [user_choice, left, right], None, preprocess=False) return demo if __name__ == "__main__": last_left_video_path = None last_right_video_path = None demo = build_interface() # demo.launch(share=True) demo.launch(share=False)