bradnow commited on
Commit
78f63db
·
1 Parent(s): ddbdc31

Fix problem with multiple turns for some models

Browse files
Files changed (1) hide show
  1. app.py +35 -28
app.py CHANGED
@@ -5,21 +5,24 @@ import gradio as gr
5
 
6
  from utils import COMMUNITY_POSTFIX_URL, get_model_config, log_message, check_format, models_config
7
 
8
- print(f"Gradio version: {gr.__version__}")
 
9
 
10
  DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker"
11
- BUTTON_WIDTH = 160
 
 
12
 
13
  chat_start_count = 0
14
- model_config = None
15
- client = None
16
 
17
 
18
  def setup_model(model_name, intial=False):
19
- global model_config, client
20
  model_config = get_model_config(model_name)
21
  log_message(f"update_model() --> Model config: {model_config}")
22
- client = OpenAI(
23
  api_key=model_config.get('AUTH_TOKEN'),
24
  base_url=model_config.get('VLLM_API_URL')
25
  )
@@ -44,7 +47,8 @@ def chat_fn(message, history):
44
  # Check if the message is empty
45
  if not message.strip():
46
  gr.Warning("Please enter a message before sending.")
47
- return history
 
48
 
49
  global chat_start_count
50
  chat_start_count = chat_start_count + 1
@@ -54,34 +58,32 @@ def chat_fn(message, history):
54
  is_reasoning = model_config.get("REASONING")
55
 
56
  # Remove any assistant messages with metadata from history for multiple turns
57
- log_message(f"Original History: {history}")
58
  check_format(history, "messages")
59
- history = [item for item in history if
60
- not (isinstance(item, dict) and
61
- item.get("role") == "assistant" and
62
- isinstance(item.get("metadata"), dict) and
63
- item.get("metadata", {}).get("title") is not None)]
64
- log_message(f"Updated History: {history}")
65
- check_format(history, "messages")
66
-
67
  history.append({"role": "user", "content": message})
68
  log_message(f"History with user message: {history}")
69
  check_format(history, "messages")
70
 
71
  # Create the streaming response
72
  try:
73
- stream = client.chat.completions.create(
 
 
 
 
 
 
 
 
 
74
  model=model_config.get('MODEL_NAME'),
75
- messages=history,
76
- temperature=0.8,
77
  stream=True
78
  )
79
  except Exception as e:
80
  print(f"Error: {e}")
81
- yield gr.ChatMessage(
82
- role="assistant",
83
- content="😔 The model is unavailable at the moment. Please try again later.",
84
- )
85
  return
86
 
87
  if is_reasoning:
@@ -92,6 +94,13 @@ def chat_fn(message, history):
92
  ))
93
  log_message(f"History added thinking: {history}")
94
  check_format(history, "messages")
 
 
 
 
 
 
 
95
 
96
  output = ""
97
  completion_started = False
@@ -135,11 +144,8 @@ def chat_fn(message, history):
135
  content=output
136
  )
137
 
138
- # only yield the most recent assistant messages
139
- messages_to_yield = history[-1:] if not completion_started else history[-2:]
140
- # check_format(messages_to_yield, "messages")
141
- # log_message(f"Yielding messages: {messages_to_yield}")
142
- yield messages_to_yield
143
 
144
  log_message(f"Final History: {history}")
145
  check_format(history, "messages")
@@ -281,6 +287,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green")) as demo:
281
  desc = setup_model(actual_model_name)
282
  return desc, []
283
 
 
284
  model_dropdown.change(
285
  fn=update_model_and_clear,
286
  inputs=[model_dropdown],
 
5
 
6
  from utils import COMMUNITY_POSTFIX_URL, get_model_config, log_message, check_format, models_config
7
 
8
+ MODEL_TEMPERATURE = 0.8
9
+ BUTTON_WIDTH = 160
10
 
11
  DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker"
12
+ # DEFAULT_MODEL_NAME = "Apriel-5b"
13
+
14
+ print(f"Gradio version: {gr.__version__}")
15
 
16
  chat_start_count = 0
17
+ model_config = {}
18
+ openai_client = None
19
 
20
 
21
  def setup_model(model_name, intial=False):
22
+ global model_config, openai_client
23
  model_config = get_model_config(model_name)
24
  log_message(f"update_model() --> Model config: {model_config}")
25
+ openai_client = OpenAI(
26
  api_key=model_config.get('AUTH_TOKEN'),
27
  base_url=model_config.get('VLLM_API_URL')
28
  )
 
47
  # Check if the message is empty
48
  if not message.strip():
49
  gr.Warning("Please enter a message before sending.")
50
+ yield history
51
+ return
52
 
53
  global chat_start_count
54
  chat_start_count = chat_start_count + 1
 
58
  is_reasoning = model_config.get("REASONING")
59
 
60
  # Remove any assistant messages with metadata from history for multiple turns
61
+ log_message(f"Initial History: {history}")
62
  check_format(history, "messages")
 
 
 
 
 
 
 
 
63
  history.append({"role": "user", "content": message})
64
  log_message(f"History with user message: {history}")
65
  check_format(history, "messages")
66
 
67
  # Create the streaming response
68
  try:
69
+ history_no_thoughts = [item for item in history if
70
+ not (isinstance(item, dict) and
71
+ item.get("role") == "assistant" and
72
+ isinstance(item.get("metadata"), dict) and
73
+ item.get("metadata", {}).get("title") is not None)]
74
+ log_message(f"Updated History: {history_no_thoughts}")
75
+ check_format(history_no_thoughts, "messages")
76
+ log_message(f"history_no_thoughts with user message: {history_no_thoughts}")
77
+
78
+ stream = openai_client.chat.completions.create(
79
  model=model_config.get('MODEL_NAME'),
80
+ messages=history_no_thoughts,
81
+ temperature=MODEL_TEMPERATURE,
82
  stream=True
83
  )
84
  except Exception as e:
85
  print(f"Error: {e}")
86
+ yield [{"role": "assistant", "content": "😔 The model is unavailable at the moment. Please try again later."}]
 
 
 
87
  return
88
 
89
  if is_reasoning:
 
94
  ))
95
  log_message(f"History added thinking: {history}")
96
  check_format(history, "messages")
97
+ else:
98
+ history.append(gr.ChatMessage(
99
+ role="assistant",
100
+ content="",
101
+ ))
102
+ log_message(f"History added empty assistant: {history}")
103
+ check_format(history, "messages")
104
 
105
  output = ""
106
  completion_started = False
 
144
  content=output
145
  )
146
 
147
+ # log_message(f"Yielding messages: {history}")
148
+ yield history
 
 
 
149
 
150
  log_message(f"Final History: {history}")
151
  check_format(history, "messages")
 
287
  desc = setup_model(actual_model_name)
288
  return desc, []
289
 
290
+
291
  model_dropdown.change(
292
  fn=update_model_and_clear,
293
  inputs=[model_dropdown],