File size: 2,316 Bytes
32161b2
2d8b72c
32161b2
7ee16c9
7e7729e
d2086ac
 
2d8b72c
d2086ac
 
 
 
 
2d8b72c
 
 
d2086ac
2d8b72c
 
 
7e7729e
 
2d8b72c
d2086ac
2d8b72c
 
7e7729e
af9ddfc
 
 
 
 
d2086ac
af9ddfc
 
 
 
d2086ac
af9ddfc
 
 
 
 
 
 
d2086ac
 
 
 
 
 
 
af9ddfc
d2086ac
 
 
 
 
 
 
 
 
 
c3ae72e
d2086ac
2d8b72c
32161b2
af9ddfc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient("Grandediw/lora_model")

def respond(message, history, system_message, max_tokens, temperature, top_p):
    # Convert tuple-based history to messages if needed
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    response = ""
    for partial in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = partial.choices[0].delta.content
        response += token
        yield response

with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
    gr.Markdown(
        """
        # LORA Chat Assistant
        Welcome! This is a demo of a LORA-based Chat Assistant.  
        Start by entering your prompt below.
        """
    )

    with gr.Row():
        # System message and other parameters
        with gr.Column():
            system_message = gr.Textbox(
                value="You are a friendly Chatbot.",
                label="Initial Behavior (System Message)",
                lines=3,
                placeholder="Describe how the assistant should behave..."
            )
            max_tokens = gr.Slider(
                minimum=1, maximum=2048, value=512, step=1,
                label="Max new tokens"
            )
            temperature = gr.Slider(
                minimum=0.1, maximum=4.0, value=0.7, step=0.1,
                label="Temperature"
            )
            top_p = gr.Slider(
                minimum=0.1, maximum=1.0, value=0.95, step=0.05,
                label="Top-p (nucleus sampling)"
            )

        # Create the chat interface using tuple format
        # Note: `type='tuple'` preserves the (user, assistant) tuple format.
        chat = gr.ChatInterface(
            fn=respond,
            additional_inputs=[system_message, max_tokens, temperature, top_p],
            type='tuples'
        )

if __name__ == "__main__":
    demo.launch()