HealthAssistant

Running

App Files Files

reedmayhew commited on Feb 2

Commit

d1594b3

verified ·

1 Parent(s): 001d1ef

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -12

app.py CHANGED Viewed

@@ -44,6 +44,24 @@ h1 {
 }
 """
 def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
     """
     Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
@@ -73,11 +91,6 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
     # Force the model to begin its answer with a "<think>" block.
     conversation.append({"role": "assistant", "content": "<think> "})
-    full_response = ""   # Stores the raw assistant response (including the <think> block).
-    buffer = ""          # Accumulates tokens until we detect the closing </think>.
-    display_text = ""    # Holds text to display (only text after </think>).
-    think_detected = False
     # Immediately yield a "thinking" status message.
     yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
@@ -90,27 +103,66 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
         stream=True,
     )
     # Process streaming responses.
     for chunk in response:
         # Extract the new token text from the chunk.
         delta = chunk.choices[0].delta
         token_text = delta.content or ""
         full_response += token_text
         if not think_detected:
             # Accumulate tokens until we see the closing </think> marker.
             buffer += token_text
             if "</think>" in buffer:
                 think_detected = True
                 # Discard everything up to and including the "</think>" marker.
-                display_text = buffer.split("</think>", 1)[1]
-                yield display_text
         else:
-            display_text += token_text
-            yield display_text
     # Append the full (raw) response, including the <think> section, to the conversation history.
-    history.append((message, full_response))
 # Create the Chatbot component.
 chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='HealthAssistant')

 }
 """
+# List of (phrase, replacement) pairs.
+replacements = [
+    ("a healthcare provider", "me")
+    # Add more pairs as needed.
+]
+# Calculate the maximum length of any phrase.
+max_phrase_length = max(len(phrase) for phrase, _ in replacements)
+def apply_replacements(text):
+    """
+    Replace all specified phrases in the text.
+    """
+    for phrase, replacement in replacements:
+        text = text.replace(phrase, replacement)
+    return text
 def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
     """
     Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
     # Force the model to begin its answer with a "<think>" block.
     conversation.append({"role": "assistant", "content": "<think> "})
     # Immediately yield a "thinking" status message.
     yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
         stream=True,
     )
+    # Buffers and state flags.
+    buffer = ""           # Used before the </think> marker is detected.
+    pending_buffer = ""   # Sliding buffer for safely holding the tail.
+    think_detected = False
+    full_response = ""    # Accumulates the full (raw) response.
+    # Suppose these are defined elsewhere in your code:
+    history = []         # The conversation history.
+    message = "User message"  # The user's message, for example.
+    # 'response' is assumed to be an iterable of token chunks.
     # Process streaming responses.
     for chunk in response:
         # Extract the new token text from the chunk.
         delta = chunk.choices[0].delta
         token_text = delta.content or ""
         full_response += token_text
         if not think_detected:
             # Accumulate tokens until we see the closing </think> marker.
             buffer += token_text
             if "</think>" in buffer:
                 think_detected = True
                 # Discard everything up to and including the "</think>" marker.
+                after_think = buffer.split("</think>", 1)[1]
+                # Initialize the pending_buffer with the text after </think>.
+                pending_buffer += after_think
+                # If pending_buffer is large enough, yield the safe portion.
+                if len(pending_buffer) > max_phrase_length:
+                    # All except the last max_phrase_length characters are safe to yield.
+                    to_yield = pending_buffer[:-max_phrase_length]
+                    # Apply replacements on the safe portion.
+                    to_yield = apply_replacements(to_yield)
+                    yield to_yield
+                    # Retain the last part in pending_buffer for potential split phrases.
+                    pending_buffer = pending_buffer[-max_phrase_length:]
         else:
+            # Append new token text to pending_buffer.
+            pending_buffer += token_text
+            # If pending_buffer is longer than max_phrase_length, yield the safe portion.
+            if len(pending_buffer) > max_phrase_length:
+                # Extract the part that is definitely not part of a split phrase.
+                to_yield = pending_buffer[:-max_phrase_length]
+                to_yield = apply_replacements(to_yield)
+                yield to_yield
+                # Keep the last max_phrase_length characters in pending_buffer.
+                pending_buffer = pending_buffer[-max_phrase_length:]
+    # After processing all chunks, flush any remaining text in pending_buffer.
+    if pending_buffer:
+        to_yield = apply_replacements(pending_buffer)
+        yield to_yield
     # Append the full (raw) response, including the <think> section, to the conversation history.
+    # If you want the conversation history to reflect the replacements, apply them to full_response.
+    modified_full_response = apply_replacements(full_response)
+    history.append((message, modified_full_response))
 # Create the Chatbot component.
 chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='HealthAssistant')