Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -107,7 +107,7 @@ def apply_replacements(text):
|
|
107 |
return text
|
108 |
|
109 |
|
110 |
-
def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
|
111 |
"""
|
112 |
Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
|
113 |
Implements <think> logic:
|
@@ -134,13 +134,16 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
|
|
134 |
conversation.append({"role": "assistant", "content": assistant_msg})
|
135 |
conversation.append({"role": "user", "content": message})
|
136 |
|
137 |
-
|
138 |
-
|
|
|
139 |
|
140 |
-
|
141 |
|
142 |
-
|
143 |
-
|
|
|
|
|
144 |
|
145 |
# Call the API with streaming enabled.
|
146 |
response = client.chat.completions.create(
|
|
|
107 |
return text
|
108 |
|
109 |
|
110 |
+
def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int, fast_mode=false):
|
111 |
"""
|
112 |
Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
|
113 |
Implements <think> logic:
|
|
|
134 |
conversation.append({"role": "assistant", "content": assistant_msg})
|
135 |
conversation.append({"role": "user", "content": message})
|
136 |
|
137 |
+
if not fast_mode:
|
138 |
+
# Immediately yield a "thinking" status message.
|
139 |
+
yield "HealthAssistant is Thinking! Please wait, your response will output shortly... This may take 30-60 seconds...\n\n"
|
140 |
|
141 |
+
think_result = think(conversation)
|
142 |
|
143 |
+
# Force the model to begin its answer with a "<think>" block.
|
144 |
+
conversation.append({"role": "assistant", "content": "<think>\n"+think_result+"\n</think>"})
|
145 |
+
else:
|
146 |
+
yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
|
147 |
|
148 |
# Call the API with streaming enabled.
|
149 |
response = client.chat.completions.create(
|