Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import pipeline, set_seed | |
import torch | |
# Function to generate responses using the entire conversation history | |
def generate_response(messages, model_name, sampling_temperature, max_tokens, top_p): | |
generator = pipeline('text-generation', model=model_name, torch_dtype=torch.float16) | |
set_seed(42) # You can set a different seed for reproducibility | |
# Combine entire conversation history | |
conversation = "" | |
for message in messages: | |
role = message['role'] | |
content = message['content'] | |
conversation += f"<|im_start|>{role}\n{content}<|im_end|>\n" | |
# Generate response | |
response = generator(conversation, max_length=2048, temperature=sampling_temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=1.1, top_k=12) | |
return [{'content': response[0]['generated_text'], 'role': 'assistant'}] | |
# Gradio chatbot interface with conversation history | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=[ | |
gr.Chat("You", "Chatbot"), | |
gr.Dropdown("Select Model", ["Locutusque/TinyMistral-248M-v2.5-Instruct", "Locutusque/Hercules-1.0-Mistral-7B", "Locutusque/UltraQwen-1_8B"]), | |
gr.Slider("Sampling Temperature", 0.1, 2.0, 1.0, 0.1), | |
gr.Slider("Max Tokens", 5, 200, 50, 5), | |
gr.Slider("Top P", 0.1, 0.5, 0.75, 0.1) | |
], | |
outputs=gr.Chat(role="Chatbot"), | |
live=True, | |
capture_session=True | |
) | |
# Launch Gradio chatbot interface | |
iface.launch() | |