codeblacks commited on
Commit
6f04663
·
verified ·
1 Parent(s): 055366b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -18
app.py CHANGED
@@ -2,32 +2,25 @@ import gradio as gr
2
  import asyncio
3
  from huggingface_hub import InferenceClient
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
  client = InferenceClient("microsoft/Phi-3.5-mini-instruct")
9
 
10
 
11
  async def respond(
12
  message,
13
- history: list[tuple[str, str]],
14
  system_message,
15
  max_tokens,
16
  temperature,
17
  top_p,
18
  ):
 
19
  messages = [{"role": "system", "content": system_message}]
 
20
 
21
- for val in history:
22
- if val[0]:
23
- messages.append({"role": "user", "content": val[0]})
24
- if val[1]:
25
- messages.append({"role": "assistant", "content": val[1]})
26
-
27
  messages.append({"role": "user", "content": message})
28
 
29
  response = ""
30
-
31
  async for message in client.chat_completion(
32
  messages,
33
  max_tokens=max_tokens,
@@ -36,16 +29,12 @@ async def respond(
36
  top_p=top_p,
37
  ):
38
  token = message.choices[0].delta.content
39
-
40
  response += token
41
  yield response
42
 
43
 
44
- """
45
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
- """
47
  demo = gr.ChatInterface(
48
- respond,
49
  additional_inputs=[
50
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -58,8 +47,8 @@ demo = gr.ChatInterface(
58
  label="Top-p (nucleus sampling)",
59
  ),
60
  ],
 
61
  )
62
 
63
- # Launch the app with concurrency for 4 users
64
  if __name__ == "__main__":
65
- demo.queue(concurrency_count=4).launch()
 
2
  import asyncio
3
  from huggingface_hub import InferenceClient
4
 
 
 
 
5
  client = InferenceClient("microsoft/Phi-3.5-mini-instruct")
6
 
7
 
8
  async def respond(
9
  message,
10
+ history: list[dict],
11
  system_message,
12
  max_tokens,
13
  temperature,
14
  top_p,
15
  ):
16
+ # Ensure history is in OpenAI-style 'role' and 'content' format
17
  messages = [{"role": "system", "content": system_message}]
18
+ messages.extend(history) # Add existing history
19
 
20
+ # Add the user's latest message
 
 
 
 
 
21
  messages.append({"role": "user", "content": message})
22
 
23
  response = ""
 
24
  async for message in client.chat_completion(
25
  messages,
26
  max_tokens=max_tokens,
 
29
  top_p=top_p,
30
  ):
31
  token = message.choices[0].delta.content
 
32
  response += token
33
  yield response
34
 
35
 
 
 
 
36
  demo = gr.ChatInterface(
37
+ fn=respond,
38
  additional_inputs=[
39
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
40
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
47
  label="Top-p (nucleus sampling)",
48
  ),
49
  ],
50
+ chatbot=gr.Chatbot(type="messages"), # Specify the 'messages' format
51
  )
52
 
 
53
  if __name__ == "__main__":
54
+ demo.queue().launch() # Simply call queue without `concurrency_count`