YangWu001 commited on
Commit
af50430
·
1 Parent(s): dd9a62d
Files changed (1) hide show
  1. app.py +14 -27
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import time
4
  import torch
5
  from transformers import pipeline
6
 
@@ -13,16 +12,20 @@ stop_inference = False
13
 
14
  def respond(
15
  message,
16
- history: list[tuple[str, str]],
17
- system_message,
18
- max_tokens,
19
- temperature,
20
- top_p,
21
- use_local_model,
22
  ):
23
  global stop_inference
24
  stop_inference = False # Reset cancellation flag
25
 
 
 
 
 
26
  if use_local_model:
27
  # Simulate local inference (ignoring history)
28
  messages = [{"role": "system", "content": system_message}]
@@ -34,14 +37,14 @@ def respond(
34
  messages.append({"role": "user", "content": message})
35
 
36
  response = ""
37
- for message in pipe(
38
  messages,
39
  max_new_tokens=max_tokens,
40
  temperature=temperature,
41
  do_sample=True,
42
  top_p=top_p,
43
  ):
44
- token = message['generated_text'][-1]['content']
45
  response += token
46
  yield response # Yielding response directly
47
 
@@ -140,24 +143,8 @@ with gr.Blocks(css=custom_css) as demo:
140
 
141
  cancel_button = gr.Button("Cancel Inference", variant="danger")
142
 
143
- # def chat_fn(message):
144
- # response_gen = respond(
145
- # message,
146
- # # history: list[tuple[str, str]],
147
- # system_message.value,
148
- # max_tokens.value,
149
- # temperature.value,
150
- # top_p.value,
151
- # use_local_model.value,
152
- # )
153
- # full_response = ""
154
- # for response in response_gen:
155
- # full_response += response # Accumulate the full response
156
-
157
- # return full_response
158
- chat_fn = respond
159
-
160
- user_input.submit(chat_fn, inputs=user_input, outputs=chat_history)
161
  cancel_button.click(cancel_inference)
162
 
163
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import torch
4
  from transformers import pipeline
5
 
 
12
 
13
  def respond(
14
  message,
15
+ history=None, # Default to None
16
+ system_message="You are a friendly Chatbot.",
17
+ max_tokens=512,
18
+ temperature=0.7,
19
+ top_p=0.95,
20
+ use_local_model=False,
21
  ):
22
  global stop_inference
23
  stop_inference = False # Reset cancellation flag
24
 
25
+ # Initialize history if it's None
26
+ if history is None:
27
+ history = []
28
+
29
  if use_local_model:
30
  # Simulate local inference (ignoring history)
31
  messages = [{"role": "system", "content": system_message}]
 
37
  messages.append({"role": "user", "content": message})
38
 
39
  response = ""
40
+ for output in pipe(
41
  messages,
42
  max_new_tokens=max_tokens,
43
  temperature=temperature,
44
  do_sample=True,
45
  top_p=top_p,
46
  ):
47
+ token = output['generated_text'][-1]['content']
48
  response += token
49
  yield response # Yielding response directly
50
 
 
143
 
144
  cancel_button = gr.Button("Cancel Inference", variant="danger")
145
 
146
+ # Adjusted to ensure history is maintained and passed correctly
147
+ user_input.submit(respond, [user_input, chat_history, system_message, max_tokens, temperature, top_p, use_local_model], chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  cancel_button.click(cancel_inference)
149
 
150
  if __name__ == "__main__":