import gradio as gr
import os
import random
import numpy as np
import gdown
import base64
from time import gmtime, strftime
from csv import writer

from datasets import load_dataset
from hfserver import HuggingFaceDatasetSaver, HuggingFaceDatasetJSONSaver

ENVS = ['ShadowHand', 'ShadowHandCatchAbreast', 'ShadowHandOver', 'ShadowHandBlockStack', 'ShadowHandCatchUnderarm',
'ShadowHandCatchOver2Underarm', 'ShadowHandBottleCap', 'ShadowHandLiftUnderarm', 'ShadowHandTwoCatchUnderarm',
'ShadowHandDoorOpenInward', 'ShadowHandDoorOpenOutward', 'ShadowHandDoorCloseInward', 'ShadowHandDoorCloseOutward',
'ShadowHandPushBlock', 'ShadowHandKettle', 
'ShadowHandScissors', 'ShadowHandPen', 'ShadowHandSwingCup', 'ShadowHandGraspAndPlace', 'ShadowHandSwitch']

# download data from huggingface dataset
# dataset = load_dataset("quantumiracle-git/robotinder-data")

LOAD_DATA_GOOGLE_DRIVE = True
if LOAD_DATA_GOOGLE_DRIVE:  # download data from google drive
    # url = 'https://drive.google.com/drive/folders/1JuNQS4R7axTezWj1x4KRAuRt_L26ApxA?usp=sharing'  # './processed/' folder in google drive
    url = 'https://drive.google.com/drive/folders/1o8Q9eX-J7F326zv4g2MZWlzR46uVkUF2?usp=sharing'  # './processed_zip/' folder in google drive
    output = './'
    id = url.split('/')[-1]
    os.system(f"gdown --id {id} -O {output} --folder --no-cookies --remaining-ok")
    VIDEO_PATH = 'processed_zip'

    import zipfile
    from os import listdir
    from os.path import isfile, join, isdir
    # unzip the zip files to the same location and delete zip files
    path_to_zip_file = VIDEO_PATH
    zip_files = [join(path_to_zip_file, f) for f in listdir(path_to_zip_file)]
    for f in zip_files:
        if f.endswith(".zip"):
            directory_to_extract_to = path_to_zip_file # extracted file itself contains a folder
            print(f'extract data {f} to {directory_to_extract_to}')
            with zipfile.ZipFile(f, 'r') as zip_ref:
                zip_ref.extractall(directory_to_extract_to)
            os.remove(f)

else:  # local data
    VIDEO_PATH = 'robotinder-data'


def inference(video_path):
    with open(video_path, "rb") as f:
        data = f.read()
        b64 = base64.b64encode(data).decode()
    html = (
            f"""
            <video controls autoplay muted loop>
            <source src="data:video/mp4;base64,{b64}" type="video/mp4">
            </video> 
            """
    )
    return html

def video_identity(video):
    return video

def nan():
    return None

FORMAT = ['mp4', 'gif'][0]

def get_huggingface_dataset():
    try:
        import huggingface_hub
    except (ImportError, ModuleNotFoundError):
        raise ImportError(
            "Package `huggingface_hub` not found is needed "
            "for HuggingFaceDatasetSaver. Try 'pip install huggingface_hub'."
        )
    HF_TOKEN = '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'  # my HF token
    DATASET_NAME = 'crowdsourced-robotinder-demo'
    FLAGGING_DIR = 'flag/'
    path_to_dataset_repo = huggingface_hub.create_repo(
        repo_id=DATASET_NAME,
        token=HF_TOKEN,
        private=False,
        repo_type="dataset",
        exist_ok=True,
    )    
    dataset_dir = os.path.join(DATASET_NAME, FLAGGING_DIR)
    repo = huggingface_hub.Repository(
        local_dir=dataset_dir,
        clone_from=path_to_dataset_repo,
        use_auth_token=HF_TOKEN,
    )
    repo.git_pull(lfs=True)
    log_file = os.path.join(dataset_dir, "flag_data.csv")
    return repo, log_file

def update(user_choice, left, right, choose_env, data_folder=VIDEO_PATH, flag_to_huggingface=True):
    global last_left_video_path 
    global last_right_video_path 
    global last_infer_left_video_path
    global last_infer_right_video_path
    
    if flag_to_huggingface: # log
        env_name = str(last_left_video_path).split('/')[1]  # 'robotinder-data/ENV_NAME/'
        current_time = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
        info = [env_name, user_choice, last_left_video_path, last_right_video_path, current_time]
        print(info)
        repo, log_file = get_huggingface_dataset()
        with open(log_file, 'a') as file: # incremental change of the file
            writer_object = writer(file)
            writer_object.writerow(info)
            file.close()
        if int(current_time.split('-')[-2]) % 5 == 0:  # push only on certain minutes
            try:
                repo.push_to_hub(commit_message=f"Flagged sample at {current_time}")
            except:
                repo.git_pull(lfs=True)  # sync with remote first
                repo.push_to_hub(commit_message=f"Flagged sample at {current_time}")
    if choose_env == 'Random':
        envs = parse_envs()   
        env_name = envs[random.randint(0, len(envs)-1)]
    else:
        env_name = choose_env
    # choose video
    videos = os.listdir(os.path.join(data_folder, env_name))
    video_files = []
    for f in videos:
        if f.endswith(f'.{FORMAT}'):
            video_files.append(os.path.join(data_folder, env_name, f))
    # randomly choose two videos
    selected_video_ids = np.random.choice(len(video_files), 2, replace=False)
    left = video_files[selected_video_ids[0]]
    right = video_files[selected_video_ids[1]]
    last_left_video_path = left
    last_right_video_path = right
    last_infer_left_video_path = inference(left)
    last_infer_right_video_path = inference(right)
    
    return last_infer_left_video_path, last_infer_right_video_path, env_name

def replay(left, right):  
    return left, right

def parse_envs(folder=VIDEO_PATH):
    envs = []
    for f in os.listdir(folder):
        if os.path.isdir(os.path.join(folder, f)):
            envs.append(f)
    return envs

def build_interface(iter=3, data_folder=VIDEO_PATH):
    import sys
    import csv
    csv.field_size_limit(sys.maxsize)
    
    HF_TOKEN = os.getenv('HF_TOKEN')
    print(HF_TOKEN)
    HF_TOKEN = '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'  # my HF token
    # hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo")  # HuggingFace logger instead of local one: https://github.com/gradio-app/gradio/blob/master/gradio/flagging.py
    hf_writer = HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-robotinder-demo")
    # callback = gr.CSVLogger()
    callback = hf_writer

    # build gradio interface
    with gr.Blocks() as demo:
        gr.Markdown("## Here is <span style=color:cyan>RoboTinder</span>!")
        gr.Markdown("### Select the best robot behaviour in your choice!")
        # some initial values
        envs = parse_envs()   
        env_name = envs[random.randint(0, len(envs)-1)] # random pick an env 
        with gr.Row():
            str_env_name = gr.Markdown(f"{env_name}")

        # choose video
        videos = os.listdir(os.path.join(data_folder, env_name))
        video_files = []
        for f in videos:
            if f.endswith(f'.{FORMAT}'):
                video_files.append(os.path.join(data_folder, env_name, f))
        # randomly choose two videos
        selected_video_ids = np.random.choice(len(video_files), 2, replace=False)
        left_video_path = video_files[selected_video_ids[0]]
        right_video_path = video_files[selected_video_ids[1]]
        
        with gr.Row():
            if FORMAT == 'mp4':
                # left = gr.PlayableVideo(left_video_path, label="left_video")
                # right = gr.PlayableVideo(right_video_path, label="right_video")

                infer_left_video_path = inference(left_video_path)
                infer_right_video_path = inference(right_video_path)
                right = gr.HTML(infer_right_video_path, label="right_video")
                left = gr.HTML(infer_left_video_path, label="left_video")
            else:
                left = gr.Image(left_video_path, shape=(1024, 768), label="left_video")
                # right = gr.Image(right_video_path).style(height=768, width=1024)
                right = gr.Image(right_video_path, label="right_video")

        global last_left_video_path 
        last_left_video_path = left_video_path
        global last_right_video_path 
        last_right_video_path = right_video_path

        global last_infer_left_video_path
        last_infer_left_video_path = infer_left_video_path
        global last_infer_right_video_path
        last_infer_right_video_path = infer_right_video_path

        # btn1 = gr.Button("Replay")
        user_choice = gr.Radio(["Left", "Right", "Not Sure"], label="Which one is your favorite?")
        choose_env = gr.Radio(["Random"]+ENVS, label="Choose the next task:")
        btn2 = gr.Button("Next")

        # This needs to be called at some point prior to the first call to callback.flag()
        callback.setup([user_choice, left, right], "flagged_data_points")
        
        # btn1.click(fn=replay, inputs=[left, right], outputs=[left, right])
        btn2.click(fn=update, inputs=[user_choice, left, right, choose_env], outputs=[left, right, str_env_name])

        # We can choose which components to flag -- in this case, we'll flag all of them
        btn2.click(lambda *args: callback.flag(args), [user_choice, left, right], None, preprocess=False)

    return demo

if __name__ == "__main__":
    last_left_video_path = None
    last_right_video_path = None

    demo = build_interface()
    # demo.launch(share=True)
    demo.launch(share=False)