import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the model and tokenizer model_name = "mrcuddle/SD-Prompter-1B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Function to generate a response def chat(message, history): # Combine the message and history into a single input input_text = " ".join([f"{user}: {msg}" for user, msg in history] + [f"User: {message}"]) inputs = tokenizer(input_text, return_tensors="pt") # Generate a response with torch.no_grad(): outputs = model.generate(inputs.input_ids, max_length=50, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the new response part response = response.replace(input_text, "").strip() # Append the new message and response to the history history.append(("User", message)) history.append(("Assistant", response)) return history, history # Create the Gradio chat interface iface = gr.ChatInterface( fn=chat, title="Llama3.2 1B Stable Diffusion Prompter", description="Generate Stable Diffusion Prompt with Llama3.2" ) # Launch the interface iface.launch()