import csv
import os
import time
from datetime import datetime
from queue import Queue
import threading

import pandas as pd
from gradio import ChatMessage
from huggingface_hub import HfApi, hf_hub_download

from timer import Timer
from utils import log_warning, log_info, log_debug, log_error

HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO_ID = os.environ.get("APRIEL_PROMPT_DATASET")
CSV_FILENAME = "train.csv"


def log_chat(chat_id: str, session_id: str, model_name: str, prompt: str, history: list[str], info: dict) -> None:
    log_info(f"log_chat() called for chat: {chat_id}, queue size: {log_chat_queue.qsize()}, model: {model_name}")
    log_chat_queue.put((chat_id, session_id, model_name, prompt, history, info))


def _log_chat_worker():
    while True:
        chat_id, session_id, model_name, prompt, history, info = log_chat_queue.get()
        try:
            try:
                _log_chat(chat_id, session_id, model_name, prompt, history, info)
            except Exception as e:
                log_error(f"Error logging chat: {e}")
        finally:
            log_chat_queue.task_done()


def _log_chat(chat_id: str, session_id: str, model_name: str, prompt: str, history: list[str], info: dict) -> bool:
    log_info(f"_log_chat() storing chat {chat_id}")
    if DATASET_REPO_ID is None:
        log_warning("No dataset repo ID provided. Skipping logging of prompt.")
        return False
    if HF_TOKEN is None:
        log_warning("No HF token provided. Skipping logging of prompt.")
        return False

    log_timer = Timer('log_chat')
    log_timer.start()

    # Initialize HF API
    api = HfApi(token=HF_TOKEN)

    # Check if the dataset repo exists, if not, create it
    try:
        repo_info = api.repo_info(repo_id=DATASET_REPO_ID, repo_type="dataset")
        log_debug(f"log_chat() --> Dataset repo found: {repo_info.id} private={repo_info.private}")
    except Exception:  # Create new dataset if none exists
        log_debug(f"log_chat() --> No dataset repo found, creating a new one...")
        api.create_repo(repo_id=DATASET_REPO_ID, repo_type="dataset", private=True)

    # Ensure messages are in the correct format
    messages = [
        {"role": item.role, "content": item.content,
         "type": "thought" if item.metadata else "completion"} if isinstance(
            item, ChatMessage) else item
        for item in history
        if isinstance(item, dict) and "role" in item and "content" in item or isinstance(item, ChatMessage)
    ]
    if len(messages) != len(history):
        log_warning("log_chat() --> Some messages in history are missing 'role' or 'content' keys.")

    user_messages_count = sum(1 for item in messages if isinstance(item, dict) and item.get("role") == "user")

    # These must match the keys in the new row
    expected_headers = ["timestamp", "chat_id", "turns", "prompt", "messages", "model", "session_id", "info"]
    # Prepare new data row
    new_row = {
        "timestamp": datetime.now().isoformat(),
        "chat_id": chat_id,
        "turns": user_messages_count,
        "prompt": prompt,
        "messages": messages,
        "model": model_name,
        "session_id": session_id,
        "info": info,
    }
    log_timer.add_step("Prepared new data row")

    # Try to download existing CSV with retry logic
    max_retries = 3
    retry_count = 0
    file_exists = False
    while retry_count < max_retries:
        try:
            csv_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=CSV_FILENAME,
                repo_type="dataset",
                token=HF_TOKEN  # Only needed if not already logged in
            )
            pd.read_csv(csv_path)
            file_exists = True
            log_debug(f"log_chat() --> Downloaded existing CSV with {len(pd.read_csv(csv_path))} rows")
            break  # Success, exit the loop
        except Exception as e:
            retry_count += 1
            if retry_count < max_retries:
                retry_delay = 2 * retry_count  # Exponential backoff: 2s, 4s, 6s, 8s
                log_warning(
                    f"log_chat() --> Download attempt {retry_count} failed: {e}. Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                log_warning(f"log_chat() --> Failed to download CSV after {max_retries} attempts: {e}")
        file_exists = False

    log_timer.add_step(f"Downloaded existing CSV (attempts: {retry_count + 1})")

    # Handle the case where the CSV file does not exist or is invalid
    if file_exists and len(pd.read_csv(csv_path)) == 0:
        log_warning(f"log_chat() --> CSV {csv_path} exists but is empty, will create a new one.")
        dump_hub_csv()
        file_exists = False
    elif file_exists:
        # Check that the headers match our standard headers of "timestamp", "chat_id", "turns", ...
        existing_headers = pd.read_csv(csv_path).columns.tolist()
        if set(existing_headers) != set(expected_headers):
            log_warning(f"log_chat() --> CSV {csv_path} has unexpected headers: {existing_headers}. "
                        f"\nExpected {existing_headers} "
                        f"Will create a new one.")
            dump_hub_csv()
            file_exists = False
        else:
            log_debug(f"log_chat() --> CSV {csv_path} has expected headers: {existing_headers}")

    # Write out the new row to the CSV file (append isn't working in HF container, so recreate each time)
    log_debug(f"log_chat() --> Writing CSV file, file_exists={file_exists}")
    try:
        with open(CSV_FILENAME, "w", newline="\n") as f:
            writer = csv.DictWriter(f, fieldnames=new_row.keys())
            writer.writeheader()  # Always write the header
            if file_exists:
                for _, row in pd.read_csv(csv_path).iterrows():
                    writer.writerow(row.to_dict())  # Write existing rows
            writer.writerow(new_row)  # Write the new row

        log_debug("log_chat() --> Wrote out CSV with new row")
        # dump_local_csv()
    except Exception as e:
        log_error(f"log_chat() --> Error writing to CSV: {e}")
        return False

    # Upload updated CSV
    api.upload_file(
        path_or_fileobj=CSV_FILENAME,
        path_in_repo=CSV_FILENAME,
        repo_id=DATASET_REPO_ID,
        repo_type="dataset",
        commit_message=f"Added new chat entry at {datetime.now().isoformat()}"
    )
    log_timer.add_step("Uploaded updated CSV")
    log_timer.end()
    log_debug("log_chat() --> Finished logging chat")
    log_debug(log_timer.formatted_result())

    return True


def dump_hub_csv():
    # Verify the file contents by loading it from the hub and printing it out
    try:
        csv_path = hf_hub_download(
            repo_id=DATASET_REPO_ID,
            filename=CSV_FILENAME,
            repo_type="dataset",
            token=HF_TOKEN  # Only needed if not already logged in
        )
        df = pd.read_csv(csv_path)
        log_info(df)
        if (df.empty):
            # show raw contents of downloaded csv file
            log_info("Raw file contents:")
            with open(csv_path, 'r') as f:
                print(f.read())
    except Exception as e:
        log_error(f"Error loading CSV from hub: {e}")


def dump_local_csv():
    # Verify the file contents by loading it from the local file and printing it out
    try:
        df = pd.read_csv(CSV_FILENAME)
        log_info(df)
    except Exception as e:
        log_error(f"Error loading CSV from local file: {e}")


def test_log_chat():
    # Example usage
    chat_id = "12345"
    session_id = "67890"
    model_name = "Apriel-Model"
    prompt = "Hello"
    history = [{"role": "user", "content": prompt}, {"role": "assistant", "content": "Hi there!"}]
    prompt = "100 + 1"
    history = [{'role': 'user', 'content': prompt}, ChatMessage(
        content='Okay, that\'s a simple addition problem. , answer is 2.\n', role='assistant',
        metadata={'title': '🧠 Thought'}, options=[]),
               ChatMessage(content='\nThe result of adding 1 and 1 is:\n\n**2**\n', role='assistant', metadata={},
                           options=[])
               ]
    info = {"additional_info": "Some extra data"}

    log_debug("Starting test_log_chat()")
    dump_hub_csv()
    log_chat(chat_id, session_id, model_name, prompt, history, info)
    log_debug("log_chat 1 returned")
    log_chat(chat_id, session_id, model_name, prompt + " + 2", history, info)
    log_debug("log_chat 2 returned")
    log_chat(chat_id, session_id, model_name, prompt + " + 3", history, info)
    log_debug("log_chat 3 returned")
    log_chat(chat_id, session_id, model_name, prompt + " + 4", history, info)
    log_debug("log_chat 4 returned")

    sleep_seconds = 10
    log_debug(f"Sleeping {sleep_seconds} seconds to let it finish and log the result.")
    time.sleep(sleep_seconds)
    log_debug("Finished sleeping.")
    dump_hub_csv()


# Create a queue for logging chat messages
log_chat_queue = Queue()

# Start the worker thread
threading.Thread(target=_log_chat_worker, daemon=True).start()

if __name__ == "__main__":
    test_log_chat()