import torch
import gradio as gr
import tiktoken  # Import tiktoken for GPT-2 tokenization
from gpt_parts import GPTModel  # Ensure gpt_parts.py contains your GPTModel definition

# Configuration for GPT-2 model, same as used during training
GPT_CONFIG_124M  = {
        "vocab_size": 50257,     # Vocabulary size
        "context_length": 1024,  # Context length
        "emb_dim": 768,          # Embedding dimension
        "n_heads": 12,           # Number of attention heads
        "n_layers": 12,          # Number of layers
        "drop_rate": 0.1,        # Dropout rate
        "qkv_bias": False        # Query-Key-Value bias
    }

# Initialize the tokenizer using tiktoken's GPT-2 encoding
tokenizer = tiktoken.get_encoding("gpt2")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPTModel(GPT_CONFIG_124M).to(device)
model.load_state_dict(torch.load("model.pth", map_location=device, weights_only=True))
model.eval()  # Set model to evaluation mode

def text_to_token_ids(text, tokenizer):
    """Encode text to token IDs."""
    encoded = tokenizer.encode(text)
    return torch.tensor(encoded).unsqueeze(0)

def token_ids_to_text(token_ids, tokenizer):
    """Decode token IDs to text."""
    return tokenizer.decode(token_ids.squeeze(0).tolist())

def generate_text_simple(model, idx, max_new_tokens, context_size):
    """Autoregressively generate new tokens."""
    for _ in range(max_new_tokens):
        idx_cond = idx[:, -context_size:]
        with torch.no_grad():
            logits = model(idx_cond)
        logits = logits[:, -1, :]
        idx_next = torch.argmax(logits, dim=-1, keepdim=True) 
        idx = torch.cat((idx, idx_next), dim=1)
    return idx

# Define text generation function for Gradio
def generate_text(start_context, max_new_tokens=50):
    # Encode the starting context
    encoded_input = text_to_token_ids(start_context, tokenizer).to(device)
    
    # Generate text
    generated_token_ids = generate_text_simple(
        model=model, 
        idx=encoded_input, 
        max_new_tokens=max_new_tokens, 
        context_size=GPT_CONFIG_124M["context_length"]
    )
    
    # Decode the generated tokens to text
    generated_text = token_ids_to_text(generated_token_ids, tokenizer)
    return generated_text.replace("\n", " ")

iface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter starting text here...", label="Start Context"),
        gr.Slider(minimum=1, maximum=100, step=1, label="Max New Tokens")
    ],
    outputs="text",
    title="GPT-2 Text Generation",
    description="Generate text using a fine-tuned GPT-2 model. Enter some starting text, and choose the maximum number of tokens to generate."
)
iface.launch(share=True)