reedmayhew commited on
Commit
d1594b3
·
verified ·
1 Parent(s): 001d1ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -12
app.py CHANGED
@@ -44,6 +44,24 @@ h1 {
44
  }
45
  """
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
48
  """
49
  Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
@@ -73,11 +91,6 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
73
  # Force the model to begin its answer with a "<think>" block.
74
  conversation.append({"role": "assistant", "content": "<think> "})
75
 
76
- full_response = "" # Stores the raw assistant response (including the <think> block).
77
- buffer = "" # Accumulates tokens until we detect the closing </think>.
78
- display_text = "" # Holds text to display (only text after </think>).
79
- think_detected = False
80
-
81
  # Immediately yield a "thinking" status message.
82
  yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
83
 
@@ -90,27 +103,66 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
90
  stream=True,
91
  )
92
 
 
 
 
 
 
 
 
 
 
 
 
93
  # Process streaming responses.
94
  for chunk in response:
95
  # Extract the new token text from the chunk.
96
  delta = chunk.choices[0].delta
97
  token_text = delta.content or ""
98
  full_response += token_text
99
-
100
  if not think_detected:
101
  # Accumulate tokens until we see the closing </think> marker.
102
  buffer += token_text
103
  if "</think>" in buffer:
104
  think_detected = True
105
  # Discard everything up to and including the "</think>" marker.
106
- display_text = buffer.split("</think>", 1)[1]
107
- yield display_text
 
 
 
 
 
 
 
 
 
 
 
108
  else:
109
- display_text += token_text
110
- yield display_text
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # Append the full (raw) response, including the <think> section, to the conversation history.
113
- history.append((message, full_response))
 
 
 
114
 
115
  # Create the Chatbot component.
116
  chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='HealthAssistant')
 
44
  }
45
  """
46
 
47
+ # List of (phrase, replacement) pairs.
48
+ replacements = [
49
+ ("a healthcare provider", "me")
50
+ # Add more pairs as needed.
51
+ ]
52
+
53
+ # Calculate the maximum length of any phrase.
54
+ max_phrase_length = max(len(phrase) for phrase, _ in replacements)
55
+
56
+ def apply_replacements(text):
57
+ """
58
+ Replace all specified phrases in the text.
59
+ """
60
+ for phrase, replacement in replacements:
61
+ text = text.replace(phrase, replacement)
62
+ return text
63
+
64
+
65
  def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int):
66
  """
67
  Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
 
91
  # Force the model to begin its answer with a "<think>" block.
92
  conversation.append({"role": "assistant", "content": "<think> "})
93
 
 
 
 
 
 
94
  # Immediately yield a "thinking" status message.
95
  yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
96
 
 
103
  stream=True,
104
  )
105
 
106
+ # Buffers and state flags.
107
+ buffer = "" # Used before the </think> marker is detected.
108
+ pending_buffer = "" # Sliding buffer for safely holding the tail.
109
+ think_detected = False
110
+ full_response = "" # Accumulates the full (raw) response.
111
+
112
+ # Suppose these are defined elsewhere in your code:
113
+ history = [] # The conversation history.
114
+ message = "User message" # The user's message, for example.
115
+ # 'response' is assumed to be an iterable of token chunks.
116
+
117
  # Process streaming responses.
118
  for chunk in response:
119
  # Extract the new token text from the chunk.
120
  delta = chunk.choices[0].delta
121
  token_text = delta.content or ""
122
  full_response += token_text
123
+
124
  if not think_detected:
125
  # Accumulate tokens until we see the closing </think> marker.
126
  buffer += token_text
127
  if "</think>" in buffer:
128
  think_detected = True
129
  # Discard everything up to and including the "</think>" marker.
130
+ after_think = buffer.split("</think>", 1)[1]
131
+ # Initialize the pending_buffer with the text after </think>.
132
+ pending_buffer += after_think
133
+
134
+ # If pending_buffer is large enough, yield the safe portion.
135
+ if len(pending_buffer) > max_phrase_length:
136
+ # All except the last max_phrase_length characters are safe to yield.
137
+ to_yield = pending_buffer[:-max_phrase_length]
138
+ # Apply replacements on the safe portion.
139
+ to_yield = apply_replacements(to_yield)
140
+ yield to_yield
141
+ # Retain the last part in pending_buffer for potential split phrases.
142
+ pending_buffer = pending_buffer[-max_phrase_length:]
143
  else:
144
+ # Append new token text to pending_buffer.
145
+ pending_buffer += token_text
146
+
147
+ # If pending_buffer is longer than max_phrase_length, yield the safe portion.
148
+ if len(pending_buffer) > max_phrase_length:
149
+ # Extract the part that is definitely not part of a split phrase.
150
+ to_yield = pending_buffer[:-max_phrase_length]
151
+ to_yield = apply_replacements(to_yield)
152
+ yield to_yield
153
+ # Keep the last max_phrase_length characters in pending_buffer.
154
+ pending_buffer = pending_buffer[-max_phrase_length:]
155
+
156
+ # After processing all chunks, flush any remaining text in pending_buffer.
157
+ if pending_buffer:
158
+ to_yield = apply_replacements(pending_buffer)
159
+ yield to_yield
160
+
161
  # Append the full (raw) response, including the <think> section, to the conversation history.
162
+ # If you want the conversation history to reflect the replacements, apply them to full_response.
163
+ modified_full_response = apply_replacements(full_response)
164
+ history.append((message, modified_full_response))
165
+
166
 
167
  # Create the Chatbot component.
168
  chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='HealthAssistant')