Grandediw commited on
Commit
d2086ac
·
verified ·
1 Parent(s): 3bccc1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -48
app.py CHANGED
@@ -1,86 +1,68 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("Grandediw/lora_model")
8
 
9
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
10
  messages = [{"role": "system", "content": system_message}]
11
-
12
- for val in history:
13
- if val[0]:
14
- messages.append({"role": "user", "content": val[0]})
15
- if val[1]:
16
- messages.append({"role": "assistant", "content": val[1]})
17
-
18
  messages.append({"role": "user", "content": message})
19
 
20
  response = ""
21
-
22
- for message in client.chat_completion(
23
  messages,
24
  max_tokens=max_tokens,
25
  stream=True,
26
  temperature=temperature,
27
  top_p=top_p,
28
  ):
29
- token = message.choices[0].delta.content
30
  response += token
31
  yield response
32
 
33
- """
34
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
35
- """
36
  with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
37
  gr.Markdown(
38
  """
39
  # LORA Chat Assistant
40
  Welcome! This is a demo of a LORA-based Chat Assistant.
41
- Start by entering your prompt in the chat box below.
42
  """
43
  )
44
 
45
  with gr.Row():
46
- # Left column: Chat interface
47
- with gr.Column():
48
- chat = gr.ChatInterface(
49
- fn=respond,
50
- additional_inputs=[]
51
- )
52
-
53
- # Right column: Settings and System Message
54
  with gr.Column():
55
- gr.Markdown("### Configuration")
56
  system_message = gr.Textbox(
57
  value="You are a friendly Chatbot.",
58
  label="Initial Behavior (System Message)",
59
  lines=3,
60
  placeholder="Describe how the assistant should behave..."
61
  )
62
-
63
- with gr.Accordion("Advanced Settings", open=False):
64
- max_tokens = gr.Slider(
65
- minimum=1, maximum=2048, value=512, step=1,
66
- label="Max new tokens",
67
- info="Controls the maximum number of tokens in the response."
68
- )
69
- temperature = gr.Slider(
70
- minimum=0.1, maximum=4.0, value=0.7, step=0.1,
71
- label="Temperature",
72
- info="Higher values produce more random outputs."
73
- )
74
- top_p = gr.Slider(
75
- minimum=0.1, maximum=1.0, value=0.95, step=0.05,
76
- label="Top-p (nucleus sampling)",
77
- info="Limits the tokens considered to the top portion by cumulative probability."
78
- )
79
-
80
- # Link parameters to the chat interface's function
81
- chat.configure(
82
- additional_inputs=[system_message, max_tokens, temperature, top_p]
83
  )
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  if __name__ == "__main__":
86
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
4
  client = InferenceClient("Grandediw/lora_model")
5
 
6
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
7
+ # Convert tuple-based history to messages if needed
8
  messages = [{"role": "system", "content": system_message}]
9
+ for user_msg, assistant_msg in history:
10
+ if user_msg:
11
+ messages.append({"role": "user", "content": user_msg})
12
+ if assistant_msg:
13
+ messages.append({"role": "assistant", "content": assistant_msg})
 
 
14
  messages.append({"role": "user", "content": message})
15
 
16
  response = ""
17
+ for partial in client.chat_completion(
 
18
  messages,
19
  max_tokens=max_tokens,
20
  stream=True,
21
  temperature=temperature,
22
  top_p=top_p,
23
  ):
24
+ token = partial.choices[0].delta.content
25
  response += token
26
  yield response
27
 
 
 
 
28
  with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
29
  gr.Markdown(
30
  """
31
  # LORA Chat Assistant
32
  Welcome! This is a demo of a LORA-based Chat Assistant.
33
+ Start by entering your prompt below.
34
  """
35
  )
36
 
37
  with gr.Row():
38
+ # System message and other parameters
 
 
 
 
 
 
 
39
  with gr.Column():
 
40
  system_message = gr.Textbox(
41
  value="You are a friendly Chatbot.",
42
  label="Initial Behavior (System Message)",
43
  lines=3,
44
  placeholder="Describe how the assistant should behave..."
45
  )
46
+ max_tokens = gr.Slider(
47
+ minimum=1, maximum=2048, value=512, step=1,
48
+ label="Max new tokens"
49
+ )
50
+ temperature = gr.Slider(
51
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1,
52
+ label="Temperature"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
+ top_p = gr.Slider(
55
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
56
+ label="Top-p (nucleus sampling)"
57
+ )
58
+
59
+ # Create the chat interface using tuple format
60
+ # Note: `type='tuple'` preserves the (user, assistant) tuple format.
61
+ chat = gr.ChatInterface(
62
+ fn=respond,
63
+ additional_inputs=[system_message, max_tokens, temperature, top_p],
64
+ type='tuple'
65
+ )
66
 
67
  if __name__ == "__main__":
68
  demo.launch()