HealthAssistant

Running

reedmayhew commited on Feb 16

Commit

9955a0a

verified ·

1 Parent(s): 44236a7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -107,7 +107,7 @@ def apply_replacements(text):
     return text
-def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
     """
     Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
     Implements <think> logic:
@@ -134,13 +134,16 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
         conversation.append({"role": "assistant", "content": assistant_msg})
     conversation.append({"role": "user", "content": message})
-    # Immediately yield a "thinking" status message.
-    yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
-    think_result = think(conversation)
-    # Force the model to begin its answer with a "<think>" block.
-    conversation.append({"role": "assistant", "content": "<think>\n"+think_result+"\n</think>"})
     # Call the API with streaming enabled.
     response = client.chat.completions.create(

     return text
+def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int, fast_mode=false):
     """
     Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
     Implements <think> logic:
         conversation.append({"role": "assistant", "content": assistant_msg})
     conversation.append({"role": "user", "content": message})
+    if not fast_mode:
+        # Immediately yield a "thinking" status message.
+        yield "HealthAssistant is Thinking! Please wait, your response will output shortly... This may take 30-60 seconds...\n\n"
+        think_result = think(conversation)
+        # Force the model to begin its answer with a "<think>" block.
+        conversation.append({"role": "assistant", "content": "<think>\n"+think_result+"\n</think>"})
+    else:
+        yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
     # Call the API with streaming enabled.
     response = client.chat.completions.create(