import gc
import torch
from nltk import sent_tokenize
import nltk
from tqdm import tqdm
import gradio as gr
from peft import PeftModel
from transformers import T5ForConditionalGeneration, T5Tokenizer

nltk.download("punkt")
# autodetect the available device
GPU_IDX = 1  # which GPU to use
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")
    assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
    device = torch.device(f"cuda:{GPU_IDX}")
    print(f"Using GPU: {GPU_IDX}")
else:
    print("CUDA is not available. Using CPU instead.")
    device = torch.device("cpu")

batch_size = 64

# Configuration for models and their adapters
model_config = {
    "Base Model": "polygraf-ai/poly-humanizer-base",
    "Large Model": "polygraf-ai/poly-humanizer-large",
    "XL Model": "polygraf-ai/poly-humanizer-XL-adapter",
}

# cache the base models, tokenizers, and adapters
# initialize model and tokenizer
models, tokenizers = {}, {}
for name, path in model_config.items():
    if name == "XL Model":
        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", torch_dtype=torch.bfloat16).to(device)
        model = PeftModel.from_pretrained(model, path, torch_dtype=torch.bfloat16, is_trainable=False)
        model = model.merge_and_unload()
        models[name] = model
        tokenizers[name] = T5Tokenizer.from_pretrained("google/flan-t5-xl")
    else:
        model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
        models[name] = model
        tokenizers[name] = T5Tokenizer.from_pretrained(path)
    print(f"Loaded model: {name}, Num. params: {model.num_parameters()}")


def paraphrase_sentences(model, tokenizer, sentences, temperature, repetition_penalty, top_k, length_penalty):
    inputs = ["Please paraphrase this sentence: " + sentence for sentence in sentences]
    inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to(model.device)
    outputs = model.generate(
        **inputs,
        do_sample=True,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        max_length=128,
        top_k=top_k,
        length_penalty=length_penalty,
    )
    answers = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
    return answers


def paraphrase_text(
    text,
    progress=gr.Progress(),
    model_name="Base Model",
    temperature=1.2,
    repetition_penalty=1.0,
    top_k=50,
    length_penalty=1.0,
):
    """
    Optimization here is to feed all sentences at once to the model.
    Paragraphs are stored as a number of sentences per paragraph.
    """
    progress(0, desc="Starting to Humanize")
    progress(0.05)
    # Select the model, tokenizer, and adapter
    tokenizer = tokenizers[model_name]
    model = models[model_name].to(device)

    # Split the text into paragraphs and then into sentences
    paragraphs = text.split("\n")
    all_sentences = []
    sentences_per_paragraph = []

    for paragraph in paragraphs:
        sentences = sent_tokenize(paragraph)
        sentences_per_paragraph.append(len(sentences))
        all_sentences.extend(sentences)

    # Process all sentences in batches
    paraphrased_sentences = []
    for i in range(0, len(all_sentences), batch_size):
        batch_sentences = all_sentences[i : i + batch_size]
        paraphrased_batch = paraphrase_sentences(
            model, tokenizer, batch_sentences, temperature, repetition_penalty, top_k, length_penalty
        )
        paraphrased_sentences.extend(paraphrased_batch)

        # Clear memory
        torch.cuda.empty_cache()
        gc.collect()

    # Reconstruct paragraphs
    humanized_paragraphs = []
    sentence_index = 0
    for num_sentences in sentences_per_paragraph:
        humanized_paragraph = " ".join(paraphrased_sentences[sentence_index : sentence_index + num_sentences])
        humanized_paragraphs.append(humanized_paragraph)
        sentence_index += num_sentences

    humanized_text = "\n".join(humanized_paragraphs)
    return humanized_text