import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the GGUF model and tokenizer
model_name = "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Define a function to generate text using the GGUF model
def generate_text(prompt):
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(inputs, max_length=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Create a Gradio chatbot interface
chatbot = gr.Chatbot(
    generate_text,
    title="GGUF Chatbot",
    description="Talk to the GGUF model!",
    width=800,
    height=600,
)

# Launch the Gradio app
chatbot.launch()