Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Initialize the pipeline with the specific model | |
pipe = pipeline("text-generation", model="JamesBentley/Llama-2-7b-chat-hf-fine-tuned") | |
def respond(message, history, system_message, max_tokens, temperature, top_p): | |
# Build the conversation history for the model | |
messages = [{"role": "system", "content": system_message}] | |
messages.extend([{"role": "user" if role == 'user' else "assistant", "content": content} for role, content in history]) | |
messages.append({"role": "user", "content": message}) | |
# Generate the response using the model | |
response = pipe(messages, max_length=max_tokens, temperature=temperature, top_p=top_p, num_return_sequences=1) | |
# Extract text from response (assumes single response generation) | |
return response[0]['generated_text'] | |
# Setup Gradio interface | |
demo = gr.ChatInterface( | |
fn=respond, | |
inputs=[ | |
gr.Textbox(label="Your message"), | |
gr.Dataframe(headers=["Role", "Content"], label="Conversation History"), | |
gr.Textbox(default="You are a friendly Chatbot.", label="System message"), | |
gr.Slider(minimum=1, maximum=2048, default=512, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=1.0, default=0.7, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, default=0.95, label="Top-p (nucleus sampling)") | |
], | |
outputs=[gr.Textbox(label="Response")] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |