Grandediw commited on
Commit
910563c
·
verified ·
1 Parent(s): c3ae72e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -46
app.py CHANGED
@@ -4,65 +4,62 @@ from huggingface_hub import InferenceClient
4
  client = InferenceClient("Grandediw/lora_model")
5
 
6
  def respond(message, history, system_message, max_tokens, temperature, top_p):
7
- # Convert tuple-based history to messages if needed
8
- messages = [{"role": "system", "content": system_message}]
 
 
9
  for user_msg, assistant_msg in history:
10
  if user_msg:
11
- messages.append({"role": "user", "content": user_msg})
12
  if assistant_msg:
13
- messages.append({"role": "assistant", "content": assistant_msg})
14
- messages.append({"role": "user", "content": message})
 
 
15
 
16
  response = ""
17
- for partial in client.chat_completion(
18
- messages,
19
- max_tokens=max_tokens,
20
- stream=True,
21
  temperature=temperature,
22
  top_p=top_p,
 
23
  ):
24
- token = partial.choices[0].delta.content
 
25
  response += token
26
  yield response
27
 
28
- with gr.Blocks(title="Enhanced LORA Chat Interface") as demo:
29
- gr.Markdown(
30
- """
31
- # LORA Chat Assistant
32
- Welcome! This is a demo of a LORA-based Chat Assistant.
33
- Start by entering your prompt below.
34
- """
35
- )
36
-
37
- with gr.Row():
38
- # System message and other parameters
39
- with gr.Column():
40
- system_message = gr.Textbox(
41
- value="You are a friendly Chatbot.",
42
- label="Initial Behavior (System Message)",
43
- lines=3,
44
- placeholder="Describe how the assistant should behave..."
45
- )
46
- max_tokens = gr.Slider(
47
- minimum=1, maximum=2048, value=512, step=1,
48
- label="Max new tokens"
49
- )
50
- temperature = gr.Slider(
51
- minimum=0.1, maximum=4.0, value=0.7, step=0.1,
52
- label="Temperature"
53
- )
54
- top_p = gr.Slider(
55
- minimum=0.1, maximum=1.0, value=0.95, step=0.05,
56
- label="Top-p (nucleus sampling)"
57
- )
58
 
59
- # Create the chat interface using tuple format
60
- # Note: `type='tuple'` preserves the (user, assistant) tuple format.
61
- chat = gr.ChatInterface(
62
- fn=respond,
63
- additional_inputs=[system_message, max_tokens, temperature, top_p],
64
- type='tuples'
 
 
 
65
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
  demo.launch()
 
4
  client = InferenceClient("Grandediw/lora_model")
5
 
6
  def respond(message, history, system_message, max_tokens, temperature, top_p):
7
+ # Build the prompt from system_message and the conversation history
8
+ # history is a list of (user_message, assistant_message) tuples
9
+ prompt = system_message.strip() + "\n\n"
10
+
11
  for user_msg, assistant_msg in history:
12
  if user_msg:
13
+ prompt += f"User: {user_msg}\n"
14
  if assistant_msg:
15
+ prompt += f"Assistant: {assistant_msg}\n"
16
+
17
+ # Add the latest user message
18
+ prompt += f"User: {message}\nAssistant:"
19
 
20
  response = ""
21
+ # Use text_generation instead of chat_completion
22
+ for partial in client.text_generation(
23
+ prompt=prompt,
24
+ max_new_tokens=max_tokens,
25
  temperature=temperature,
26
  top_p=top_p,
27
+ stream=True
28
  ):
29
+ # partial is a TextGenerationStreamResponse
30
+ token = partial.token.text # Extract the generated token text
31
  response += token
32
  yield response
33
 
34
+ with gr.Blocks(title="Text Generation Interface") as demo:
35
+ gr.Markdown("# LORA Text Generation Demo")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ with gr.Column():
38
+ system_message = gr.Textbox(
39
+ value="You are a helpful and friendly assistant.",
40
+ label="System Prompt",
41
+ lines=3,
42
+ )
43
+ max_tokens = gr.Slider(
44
+ minimum=1, maximum=2048, value=512, step=1,
45
+ label="Max new tokens"
46
  )
47
+ temperature = gr.Slider(
48
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1,
49
+ label="Temperature"
50
+ )
51
+ top_p = gr.Slider(
52
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05,
53
+ label="Top-p"
54
+ )
55
+
56
+ # Use type='tuple' if you want to maintain old style conversation format
57
+ # or omit it to use the default message format.
58
+ chat = gr.ChatInterface(
59
+ fn=respond,
60
+ additional_inputs=[system_message, max_tokens, temperature, top_p],
61
+ type='tuples'
62
+ )
63
 
64
  if __name__ == "__main__":
65
  demo.launch()