Grandediw commited on
Commit
0663176
·
verified ·
1 Parent(s): 1fc852d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -16
app.py CHANGED
@@ -4,33 +4,65 @@ from huggingface_hub import InferenceClient
4
  client = InferenceClient("Grandediw/lora_model")
5
 
6
  def respond(message, history, system_message, max_tokens, temperature, top_p):
7
- # Build the prompt from system_message and the conversation history
8
- # history is a list of (user_message, assistant_message) tuples
9
- prompt = system_message.strip() + "\n\n"
10
-
11
  for user_msg, assistant_msg in history:
12
  if user_msg:
13
- prompt += f"User: {user_msg}\n"
14
  if assistant_msg:
15
- prompt += f"Assistant: {assistant_msg}\n"
16
-
17
- # Add the latest user message
18
- prompt += f"User: {message}\nAssistant:"
19
 
20
  response = ""
21
- # Use text_generation instead of chat_completion
22
- for partial in client.text_generation(
23
- prompt=prompt,
24
- max_new_tokens=max_tokens,
25
  temperature=temperature,
26
  top_p=top_p,
27
- stream=True
28
  ):
29
- # partial is a TextGenerationStreamResponse
30
- token = partial.token.text # Extract the generated token text
31
  response += token
32
  yield response
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  if __name__ == "__main__":
36
  demo.launch()
 
4
  client = InferenceClient("Grandediw/lora_model")
5
 
6
  def respond(message, history, system_message, max_tokens, temperature, top_p):
7
+ # Convert tuple-based history to messages if needed
8
+ messages = [{"role": "system", "content": system_message}]
 
 
9
  for user_msg, assistant_msg in history:
10
  if user_msg:
11
+ messages.append({"role": "user", "content": user_msg})
12
  if assistant_msg:
13
+ messages.append({"role": "assistant", "content": assistant_msg})
14
+ messages.append({"role": "user", "content": message})
 
 
15
 
16
  response = ""
17
+ for partial in client.chat_completion(
18
+ messages,
19
+ max_tokens=max_tokens,
20
+ stream=True,
21
  temperature=temperature,
22
  top_p=top_p,
 
23
  ):
24
+ token = partial.choices[0].delta.content
 
25
  response += token
26
  yield response
27
 
28
+ with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
29
+ gr.Markdown(
30
+ """
31
+ # LORA Chat Assistant
32
+ Welcome! This is a demo of a LORA-based Chat Assistant.
33
+ Start by entering your prompt below.
34
+ """
35
+ )
36
+
37
+ with gr.Row():
38
+ # System message and other parameters
39
+ with gr.Column():
40
+ system_message = gr.Textbox(
41
+ value="You are a friendly Chatbot.",
42
+ label="Initial Behavior (System Message)",
43
+ lines=3,
44
+ placeholder="Describe how the assistant should behave..."
45
+ )
46
+ max_tokens = gr.Slider(
47
+ minimum=1, maximum=2048, value=512, step=1,
48
+ label="Max new tokens"
49
+ )
50
+ temperature = gr.Slider(
51
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1,
52
+ label="Temperature"
53
+ )
54
+ top_p = gr.Slider(
55
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
56
+ label="Top-p (nucleus sampling)"
57
+ )
58
+
59
+ # Create the chat interface using tuple format
60
+ # Note: `type='tuple'` preserves the (user, assistant) tuple format.
61
+ chat = gr.ChatInterface(
62
+ fn=respond,
63
+ additional_inputs=[system_message, max_tokens, temperature, top_p],
64
+ type='tuples'
65
+ )
66
 
67
  if __name__ == "__main__":
68
  demo.launch()