import json
import os
import time

import numpy as np
import openai
import pandas as pd
import spacy
import tqdm
from tqdm import tqdm

from .utils import get_num_tokens, parse_prompt, num_tokens_from_messages, clean_slides, slide_generation_ver2, generate_latex_slide

nlp = spacy.load('en_core_web_sm')


def set_openai_api_key(key: str):
    openai.api_key = 'key'


def generate_slide(json_pth: str):

    model_list = [model['id'] for model in openai.Model.list()['data']]
    gpt4_id = "gpt-4-0314"
    gpt3_id = 'gpt-3.5-turbo-0301'

    with open(json_pth) as f:
        data = json.load(f)

    title = data['title']
    abstract = data['abstract']
    paper_length = len(data['text'])
    sections = [[head['section'], ' '.join([data['text'][idx]['string'] for idx in range(head['start'], min(head['end'] + 1, paper_length))])] for head in data['headers']]
    figures = [fig['caption'] for fig in data['figures']]

    ### ! Split the sections by chunks with token_limit
    new_sections = []
    toc = ""
    token_limit = 1400

    for section in sections:
        section_title = section[0]
        curr_count = get_num_tokens(section[1])

        toc += section_title + "; "

        if curr_count > token_limit:
            # split the section into sentences
            sents = nlp(section[1]).sents

            temp_list = []
            for sent in sents:
                if not temp_list:
                    temp_list.append(sent.text)
                    continue
                curr_count = get_num_tokens(temp_list[-1])
                if curr_count + get_num_tokens(sent.text) < token_limit:
                    temp_list[-1] += sent.text
                else:
                    temp_list.append(sent.text)

            for i in range(len(temp_list)):
                if i == 0:
                    new_sections.append([section_title, temp_list[i]])
                else:
                    new_sections.append([section_title + " (cont.)", temp_list[i]])
        else:
            new_sections.append(section)

    print(f"Total number of sections: {len(new_sections)}")

    # ! get the initial message
    initial_user_message = "Title: " + title + "\nTable of Contents: " + toc + "\nAbstract: " + abstract
    initial_section_title = new_sections[0][0]
    initial_section_content = new_sections[0][1]

    # ! initial dialogue, Generates slides for the first section of the research paper.
    res = []
    data = [initial_user_message, initial_section_title, initial_section_content]
    messages = parse_prompt("./dialogue_1.txt", data)
    token_length = num_tokens_from_messages(messages)

    assert token_length < 2400, f"Message is too long: {token_length}"

    response = openai.ChatCompletion.create(
        model=gpt3_id,
        messages=messages,
        temperature=0.5,
    )
    answer = response["choices"][0]["message"]["content"]
    res.append(answer)
    time.sleep(10)

    ### ! Following dialogue. Generates slides for the following sections of the research paper.
    for i, (section_title, section_content) in enumerate(new_sections[1:]):
        print(f"Section {i+1}: {section_title} is being processed...")

        data = [section_content]
        messages = parse_prompt("./dialogue_2.txt", data)

        token_length = num_tokens_from_messages(messages)
        assert token_length < 2400, f"Message is too long: {token_length}"

        response = openai.ChatCompletion.create(
            model=gpt3_id,
            messages=messages,
            temperature=0.9,
        )
        answer = response["choices"][0]["message"]["content"]
        res.append(answer)

        del messages, token_length, response, answer
        time.sleep(10)  # sleep for 10 seconds to avoid API limit

    ### ! Clean slides from comments, empty lines and other garbage
    for i in range(len(res)):
        res[i] = clean_slides(res[i])

    temp_res = res
    prev_cnt = len(temp_res)

    while len(temp_res) > 1:
        temp_num_tokens = get_num_tokens("\n".join(temp_res))
        temp_res = slide_generation_ver2(temp_res, 1800)
        print(f"The length of res is {len(temp_res)}, and the number of tokens is {temp_num_tokens}")

        # if the number of slides is not changed then break
        if len(temp_res) == prev_cnt:
            break
        else:
            prev_cnt = len(temp_res)

        # if the number of tokens is less than 4000 then break
        if temp_num_tokens <= 4000:
            break

        new_res = []
        for i in tqdm(range(len(temp_res))):
            data = [temp_res[i]]
            messages = parse_prompt("./dialogue_3.txt", data)

            token_length = num_tokens_from_messages(messages)
            assert token_length < 2400, f"Message is too long: {token_length}"

            response = openai.ChatCompletion.create(
                model=gpt3_id,
                messages=messages,
                temperature=0.9,
            )

            temp = response["choices"][0]["message"]["content"]
            temp = clean_slides(temp)

            new_res.append(temp)
            time.sleep(5)  # needed to avoid API limit

        temp_res = new_res

        time.sleep(10)  # needed to avoid API limit

    # ! final refinement
    final_draft = "\n".join(temp_res)
    data = [final_draft]
    messages = parse_prompt("./dialogue_4.txt", data)

    print(num_tokens_from_messages(messages))

    response = openai.ChatCompletion.create(
        model=gpt4_id if gpt4_id in model_list else gpt3_id,
        messages=messages,
        temperature=0.5,
    )

    temp = response["choices"][0]["message"]["content"]

    # generate_latex_slide(temp, "test.tex")

    return temp