HealthAssistant

Running

App Files Files

reedmayhew commited on Feb 2

Commit

02fa10d

verified ·

1 Parent(s): 661882e

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -114,11 +114,6 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
     think_detected = False
     full_response = ""    # Accumulates the full raw response (without replacements applied).
-    # These are assumed to be defined in your application.
-    history = []         # The conversation history.
-    message = "User message"  # The user’s message (or any identifier).
-    # 'response' is assumed to be an iterable of token chunks.
     # Process streaming responses.
     for chunk in response:
         # Extract the new token text from the current chunk.
@@ -127,40 +122,38 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
         full_response += token_text
         if not think_detected:
-            # Accumulate tokens until the </think> marker is detected.
             buffer += token_text
             if "</think>" in buffer:
                 think_detected = True
-                # Discard everything up to and including the </think> marker.
                 after_think = buffer.split("</think>", 1)[1]
-                # Start the pending buffer with the text after the marker.
                 pending_buffer += after_think
-                # If pending_buffer is large enough, extract the safe portion.
-                if len(pending_buffer) > max_phrase_length:
-                    safe_portion = pending_buffer[:-max_phrase_length]
-                    safe_portion = apply_replacements(safe_portion)
-                    display_text += safe_portion
-                    yield display_text
-                    # Retain only the last max_phrase_length characters in pending_buffer.
-                    pending_buffer = pending_buffer[-max_phrase_length:]
         else:
-            # Already passed the </think> marker, so work with pending_buffer.
             pending_buffer += token_text
-            if len(pending_buffer) > max_phrase_length:
-                safe_portion = pending_buffer[:-max_phrase_length]
-                safe_portion = apply_replacements(safe_portion)
-                display_text += safe_portion
-                yield display_text
-                pending_buffer = pending_buffer[-max_phrase_length:]
-    # After processing all chunks, flush any remaining text in pending_buffer.
     if pending_buffer:
-        safe_portion = apply_replacements(pending_buffer)
-        display_text += safe_portion
         yield display_text
-    # Optionally, store the full response (including the <think> section) into the conversation history.
     # If you want the history to reflect the replacements, apply them here.
     modified_full_response = apply_replacements(full_response)
     history.append((message, modified_full_response))

     think_detected = False
     full_response = ""    # Accumulates the full raw response (without replacements applied).
     # Process streaming responses.
     for chunk in response:
         # Extract the new token text from the current chunk.
         full_response += token_text
         if not think_detected:
+            # Accumulate tokens until we see the closing </think> marker.
             buffer += token_text
             if "</think>" in buffer:
                 think_detected = True
+                # Discard everything up to and including the "</think>" marker.
                 after_think = buffer.split("</think>", 1)[1]
                 pending_buffer += after_think
+                # Only flush if we have at least MIN_FLUSH_SIZE characters.
+                if len(pending_buffer) >= MIN_FLUSH_SIZE:
+                    # Flush all but the last max_phrase_length characters.
+                    safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
+                    if safe_portion:
+                        display_text += apply_replacements(safe_portion)
+                        yield display_text
+                        pending_buffer = pending_buffer[-max_phrase_length:]
         else:
+            # After the </think> marker, add tokens to pending_buffer.
             pending_buffer += token_text
+            if len(pending_buffer) >= MIN_FLUSH_SIZE:
+                safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
+                if safe_portion:
+                    display_text += apply_replacements(safe_portion)
+                    yield display_text
+                    pending_buffer = pending_buffer[-max_phrase_length:]
+    # After processing all tokens, flush any remaining text.
     if pending_buffer:
+        safe_portion = pending_buffer  # flush whatever remains
+        display_text += apply_replacements(safe_portion)
         yield display_text
+    # Append the full (raw) response, including the <think> section, to the conversation history.
     # If you want the history to reflect the replacements, apply them here.
     modified_full_response = apply_replacements(full_response)
     history.append((message, modified_full_response))