reedmayhew commited on
Commit
9955a0a
·
verified ·
1 Parent(s): 44236a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -107,7 +107,7 @@ def apply_replacements(text):
107
  return text
108
 
109
 
110
- def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
111
  """
112
  Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
113
  Implements <think> logic:
@@ -134,13 +134,16 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
134
  conversation.append({"role": "assistant", "content": assistant_msg})
135
  conversation.append({"role": "user", "content": message})
136
 
137
- # Immediately yield a "thinking" status message.
138
- yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
 
139
 
140
- think_result = think(conversation)
141
 
142
- # Force the model to begin its answer with a "<think>" block.
143
- conversation.append({"role": "assistant", "content": "<think>\n"+think_result+"\n</think>"})
 
 
144
 
145
  # Call the API with streaming enabled.
146
  response = client.chat.completions.create(
 
107
  return text
108
 
109
 
110
+ def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int, fast_mode=false):
111
  """
112
  Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
113
  Implements <think> logic:
 
134
  conversation.append({"role": "assistant", "content": assistant_msg})
135
  conversation.append({"role": "user", "content": message})
136
 
137
+ if not fast_mode:
138
+ # Immediately yield a "thinking" status message.
139
+ yield "HealthAssistant is Thinking! Please wait, your response will output shortly... This may take 30-60 seconds...\n\n"
140
 
141
+ think_result = think(conversation)
142
 
143
+ # Force the model to begin its answer with a "<think>" block.
144
+ conversation.append({"role": "assistant", "content": "<think>\n"+think_result+"\n</think>"})
145
+ else:
146
+ yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
147
 
148
  # Call the API with streaming enabled.
149
  response = client.chat.completions.create(