File size: 2,316 Bytes
32161b2 2d8b72c 32161b2 7ee16c9 7e7729e d2086ac 2d8b72c d2086ac 2d8b72c d2086ac 2d8b72c 7e7729e 2d8b72c d2086ac 2d8b72c 7e7729e af9ddfc d2086ac af9ddfc d2086ac af9ddfc d2086ac af9ddfc d2086ac c3ae72e d2086ac 2d8b72c 32161b2 af9ddfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
from huggingface_hub import InferenceClient
client = InferenceClient("Grandediw/lora_model")
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Convert tuple-based history to messages if needed
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
response = ""
for partial in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = partial.choices[0].delta.content
response += token
yield response
with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
gr.Markdown(
"""
# LORA Chat Assistant
Welcome! This is a demo of a LORA-based Chat Assistant.
Start by entering your prompt below.
"""
)
with gr.Row():
# System message and other parameters
with gr.Column():
system_message = gr.Textbox(
value="You are a friendly Chatbot.",
label="Initial Behavior (System Message)",
lines=3,
placeholder="Describe how the assistant should behave..."
)
max_tokens = gr.Slider(
minimum=1, maximum=2048, value=512, step=1,
label="Max new tokens"
)
temperature = gr.Slider(
minimum=0.1, maximum=4.0, value=0.7, step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
label="Top-p (nucleus sampling)"
)
# Create the chat interface using tuple format
# Note: `type='tuple'` preserves the (user, assistant) tuple format.
chat = gr.ChatInterface(
fn=respond,
additional_inputs=[system_message, max_tokens, temperature, top_p],
type='tuples'
)
if __name__ == "__main__":
demo.launch()
|