import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load the model and tokenizer
model_name = "mrcuddle/SD-Prompter"
logging.info(f"Loading model and tokenizer for {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
logging.info("Model and tokenizer loaded successfully")

@spaces.GPU
# Function to generate a response
def chat(message, history):
    logging.info(f"Received message: {message}")
    logging.info(f"Chat history: {history}")

    # Combine the message and history into a single input
    input_text = " ".join([f"{user}: {msg}" for user, msg in history] + [f"User: {message}"])
    logging.info(f"Input text: {input_text}")

    inputs = tokenizer(input_text, return_tensors="pt")
    logging.info(f"Tokenized input: {inputs}")

    # Generate a response
    with torch.no_grad():
        outputs = model.generate(inputs.input_ids, max_length=300, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    logging.info(f"Generated response: {response}")

    # Extract only the new response part
    response = response.replace(input_text, "").strip()
    logging.info(f"Extracted response: {response}")

    # Append the new message and response to the history
    history.append(("User", message))
    history.append(("Assistant", response))
    logging.info(f"Updated chat history: {history}")

    return history, history

# Create the Gradio chat interface
iface = gr.ChatInterface(
    fn=chat,
    title="Llama3.2 1B Stable Diffusion Prompter",
    description="Generate Stable Diffusion Prompt with Llama3.2"
)

# Launch the interface
logging.info("Launching Gradio interface")
iface.launch()