haponeman commited on
Commit
bd26bed
·
verified ·
1 Parent(s): e3fec40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -20
app.py CHANGED
@@ -1,21 +1,35 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
3
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
 
 
 
 
 
 
 
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
 
15
  temperature,
16
  top_p,
 
 
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
21
  if val[0]:
@@ -26,18 +40,22 @@ def respond(
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
  top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
 
 
 
 
 
41
 
42
  """
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -45,19 +63,36 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
 
 
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  gr.Slider(
52
  minimum=0.1,
53
  maximum=1.0,
54
- value=0.95,
55
  step=0.05,
56
  label="Top-p (nucleus sampling)",
57
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ],
59
  )
60
 
61
 
62
  if __name__ == "__main__":
 
63
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import requests
4
+ import os
5
+
6
+ url = "http://47.94.86.196:8080/chat_completion"
7
 
8
  """
9
+ class ChatRequest(BaseModel):
10
+ messages: typing.List[typing.Dict]
11
+ max_new_tokens: int
12
+ max_length: int = 4096
13
+ temperature: int = 1.0
14
+ top_k: int = 0
15
+ top_p: float = 1.0
16
+ repetition_penalty: float = 1.03
17
+ do_sample: bool = True
18
+ seed: int = 42
19
  """
 
 
20
 
21
  def respond(
22
  message,
23
  history: list[tuple[str, str]],
24
+ do_sample: bool,
25
+ seed: int,
26
+ max_new_tokens,
27
  temperature,
28
  top_p,
29
+ top_k,
30
+ repetition_penalty
31
  ):
32
+ messages = []
33
 
34
  for val in history:
35
  if val[0]:
 
40
  messages.append({"role": "user", "content": message})
41
 
42
  response = ""
43
+ request_data = dict(
44
+ messages=messages,
45
+ max_new_tokens=max_new_tokens,
46
+ do_sample=do_sample,
47
+ seed=seed,
 
48
  top_p=top_p,
49
+ top_k=top_k,
50
+ temperature=temperature,
51
+ repetition_penalty=repetition_penalty
52
+ )
53
+ print(request_data)
54
+ with requests.post(url, json=request_data, stream=True, headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}) as r:
55
+ # printing response of each stream
56
+ for chunk in r.iter_content(1024):
57
+ response += chunk.decode("utf8")
58
+ yield response
59
 
60
  """
61
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
63
  demo = gr.ChatInterface(
64
  respond,
65
  additional_inputs=[
66
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
67
+ gr.Checkbox(True, label="do sample"),
68
+ gr.Number(42, precision=0, label="seed"),
69
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
70
+ gr.Slider(minimum=0.01, maximum=4.0, value=0.7, step=0.01, label="Temperature"),
71
  gr.Slider(
72
  minimum=0.1,
73
  maximum=1.0,
74
+ value=1.0,
75
  step=0.05,
76
  label="Top-p (nucleus sampling)",
77
  ),
78
+ gr.Slider(
79
+ minimum=0,
80
+ maximum=100,
81
+ value=0,
82
+ step=1,
83
+ label="Top-K (Top-K sampling)",
84
+ ),
85
+ gr.Slider(
86
+ minimum=1,
87
+ maximum=2,
88
+ value=1.03,
89
+ step=0.01,
90
+ label="repetition penalty",
91
+ ),
92
  ],
93
  )
94
 
95
 
96
  if __name__ == "__main__":
97
+ demo.queue(default_concurrency_limit=1, max_size=5)
98
  demo.launch()