import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from datasets import load_dataset
import json

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    COLS,
    AutoEvalColumn,
    fields,
)
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, REPO_ID, TOKEN
from src.populate import get_leaderboard_df


def restart_space():
    API.restart_space(repo_id=REPO_ID)


### Space initialisation
try:
    print(EVAL_REQUESTS_PATH)
    snapshot_download(
        repo_id=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH,
        repo_type="dataset",
        tqdm_class=None,
        etag_timeout=30,
        token=TOKEN,
    )
    dataset = load_dataset("dtcxzyw/llvm-apr-benchmark")
except Exception:
    restart_space()

total_issues = dataset.num_rows["test"]
bug_id_to_time = dict()
bug_id_to_type = dict()
bug_id_by_cat = {
    "crash": [],
    "miscompilation": [],
    "hang": [],
}
bug_id_to_comp = dict()
comp_bug_count = dict()
for issue in dataset["test"]:
    bug_id_to_time[issue["bug_id"]] = pd.to_datetime(issue["knowledge_cutoff"])
    bug_id_by_cat[issue["bug_type"]].append(issue["bug_id"])
    bug_id_to_type[issue["bug_id"]] = issue["bug_type"]
    bug_id_to_comp[issue["bug_id"]] = issue["hints"]["components"]
    for comp in issue["hints"]["components"]:
        comp_bug_count[comp] = comp_bug_count.get(comp, 0) + 1
timeline_xs = []
timeline_ys = []
timeline_cols = []
timeline_bugids = []
model_cnt = 0
for bug_id, time in bug_id_to_time.items():
    timeline_ys.append(0)
    timeline_cols.append("All")
    timeline_bugids.append(bug_id)
cat_cnt = 4
for cat, bug_ids in bug_id_by_cat.items():
    cat_cnt -= 1
    for bug_id in bug_ids:
        timeline_ys.append(cat_cnt)
        timeline_cols.append(str(cat).capitalize())
        timeline_bugids.append(bug_id)
LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
fixed_bug_ids = set()
fixed_bug_ids_fast = set()
for row in LEADERBOARD_DF.itertuples():
    print(row)
    model_cnt += 1
    for fix in row.fixed_bug_ids:
        timeline_ys.append(-model_cnt)
        timeline_cols.append(row.method_id)
        timeline_bugids.append(fix)
        fixed_bug_ids.add(fix)
    for fix in row.fixed_bug_ids_fast:
        fixed_bug_ids_fast.add(fix)
timeline_bugtypes = []
for bug_id in timeline_bugids:
    timeline_xs.append(bug_id_to_time[bug_id])
    timeline_bugtypes.append(bug_id_to_type[bug_id])
timeline_df = pd.DataFrame(
    {
        "time": timeline_xs,
        "model": timeline_ys,
        "method_name": timeline_cols,
        "bug_id": timeline_bugids,
        "bug_type": timeline_bugtypes,
    }
)
fixed_by_cat = dict()
fixed_by_cat_fast = dict()
for bug_id in fixed_bug_ids:
    fixed_by_cat[bug_id_to_type[bug_id]] = fixed_by_cat.get(bug_id_to_type[bug_id], 0) + 1
for bug_id in fixed_bug_ids_fast:
    fixed_by_cat_fast[bug_id_to_type[bug_id]] = fixed_by_cat_fast.get(bug_id_to_type[bug_id], 0) + 1
fixed_by_cat["All"] = len(fixed_bug_ids)
bug_id_by_cat["All"] = [0] * total_issues
fixed_by_cat_fast["All"] = len(fixed_bug_ids_fast)
fixed_by_cat_df = pd.DataFrame(
    {
        "Category": [str(cat).capitalize() for cat in fixed_by_cat.keys()],
        "Total": [len(bug_id_by_cat[cat]) for cat in fixed_by_cat.keys()],
        "Repaired": list(fixed_by_cat.values()),
        "Repair Rate (%)": [
            round(fixed_by_cat[cat] / len(bug_id_by_cat[cat]) * 100, 1) for cat in fixed_by_cat.keys()
        ],
        "Repaired (Fast)": [fixed_by_cat_fast.get(cat, 0) for cat in fixed_by_cat.keys()],
        "Repair Rate (Fast) (%)": [
            round(fixed_by_cat_fast.get(cat, 0) / len(bug_id_by_cat[cat]) * 100, 1) for cat in fixed_by_cat.keys()
        ],
    }
)
fixed_by_cat_df.sort_values("Total", inplace=True, ascending=False)
fixed_by_comp = dict()
for bug_id in fixed_bug_ids:
    for comp in bug_id_to_comp[bug_id]:
        fixed_by_comp[comp] = fixed_by_comp.get(comp, 0) + 1
fixed_by_comp_fast = dict()
for bug_id in fixed_bug_ids_fast:
    for comp in bug_id_to_comp[bug_id]:
        fixed_by_comp_fast[comp] = fixed_by_comp_fast.get(comp, 0) + 1
fixed_by_comp_df = pd.DataFrame(
    {
        "Component": list(comp_bug_count.keys()),
        "Total": list(comp_bug_count.values()),
        "Repaired": [fixed_by_comp.get(comp, 0) for comp in comp_bug_count.keys()],
        "Repair Rate (%)": [
            round(fixed_by_comp.get(comp, 0) / comp_bug_count[comp] * 100, 1) for comp in comp_bug_count.keys()
        ],
        "Repaired (Fast)": [fixed_by_comp_fast.get(comp, 0) for comp in comp_bug_count.keys()],
        "Repair Rate (Fast) (%)": [
            round(fixed_by_comp_fast.get(comp, 0) / comp_bug_count[comp] * 100, 1) for comp in comp_bug_count.keys()
        ],
    }
)
fixed_by_comp_df.sort_values("Total", inplace=True, ascending=False)


def init_leaderboard(dataframe):
    if dataframe is None or dataframe.empty:
        raise ValueError("Leaderboard DataFrame is empty or None.")
    return Leaderboard(
        value=dataframe,
        datatype=[c.type for c in fields(AutoEvalColumn)],
        select_columns=SelectColumns(
            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
            label="Select Columns to Display:",
        ),
        search_columns=[AutoEvalColumn.method_name.name],
        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
        filter_columns=[
            ColumnFilter(AutoEvalColumn.with_hint.name, type="checkboxgroup", label="Hint"),
        ],
        bool_checkboxgroup_label="Hide models",
        interactive=False,
    )


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT + f"\nTotal issues: {total_issues}\n", elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🏅 Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
            leaderboard = init_leaderboard(LEADERBOARD_DF[COLS])
            gr.ScatterPlot(
                timeline_df,
                x="time",
                y="model",
                color="method_name",
                x_label="Time",
                y_label="Model",
                title="Timeline",
                y_lim=(-model_cnt - 1, 4),
                tooltip=["bug_id", "method_name", "time", "bug_type"],
            )
            gr.Dataframe(fixed_by_cat_df)
            gr.Dataframe(fixed_by_comp_df)

        with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
            gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=6,
                elem_id="citation-button",
                show_copy_button=True,
            )

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()