reedmayhew commited on
Commit
02fa10d
·
verified ·
1 Parent(s): 661882e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -28
app.py CHANGED
@@ -114,11 +114,6 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
114
  think_detected = False
115
  full_response = "" # Accumulates the full raw response (without replacements applied).
116
 
117
- # These are assumed to be defined in your application.
118
- history = [] # The conversation history.
119
- message = "User message" # The user’s message (or any identifier).
120
- # 'response' is assumed to be an iterable of token chunks.
121
-
122
  # Process streaming responses.
123
  for chunk in response:
124
  # Extract the new token text from the current chunk.
@@ -127,40 +122,38 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
127
  full_response += token_text
128
 
129
  if not think_detected:
130
- # Accumulate tokens until the </think> marker is detected.
131
  buffer += token_text
132
  if "</think>" in buffer:
133
  think_detected = True
134
- # Discard everything up to and including the </think> marker.
135
  after_think = buffer.split("</think>", 1)[1]
136
- # Start the pending buffer with the text after the marker.
137
  pending_buffer += after_think
138
-
139
- # If pending_buffer is large enough, extract the safe portion.
140
- if len(pending_buffer) > max_phrase_length:
141
- safe_portion = pending_buffer[:-max_phrase_length]
142
- safe_portion = apply_replacements(safe_portion)
143
- display_text += safe_portion
144
- yield display_text
145
- # Retain only the last max_phrase_length characters in pending_buffer.
146
- pending_buffer = pending_buffer[-max_phrase_length:]
147
  else:
148
- # Already passed the </think> marker, so work with pending_buffer.
149
  pending_buffer += token_text
150
- if len(pending_buffer) > max_phrase_length:
151
- safe_portion = pending_buffer[:-max_phrase_length]
152
- safe_portion = apply_replacements(safe_portion)
153
- display_text += safe_portion
154
- yield display_text
155
- pending_buffer = pending_buffer[-max_phrase_length:]
156
 
157
- # After processing all chunks, flush any remaining text in pending_buffer.
158
  if pending_buffer:
159
- safe_portion = apply_replacements(pending_buffer)
160
- display_text += safe_portion
161
  yield display_text
162
 
163
- # Optionally, store the full response (including the <think> section) into the conversation history.
164
  # If you want the history to reflect the replacements, apply them here.
165
  modified_full_response = apply_replacements(full_response)
166
  history.append((message, modified_full_response))
 
114
  think_detected = False
115
  full_response = "" # Accumulates the full raw response (without replacements applied).
116
 
 
 
 
 
 
117
  # Process streaming responses.
118
  for chunk in response:
119
  # Extract the new token text from the current chunk.
 
122
  full_response += token_text
123
 
124
  if not think_detected:
125
+ # Accumulate tokens until we see the closing </think> marker.
126
  buffer += token_text
127
  if "</think>" in buffer:
128
  think_detected = True
129
+ # Discard everything up to and including the "</think>" marker.
130
  after_think = buffer.split("</think>", 1)[1]
 
131
  pending_buffer += after_think
132
+ # Only flush if we have at least MIN_FLUSH_SIZE characters.
133
+ if len(pending_buffer) >= MIN_FLUSH_SIZE:
134
+ # Flush all but the last max_phrase_length characters.
135
+ safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
136
+ if safe_portion:
137
+ display_text += apply_replacements(safe_portion)
138
+ yield display_text
139
+ pending_buffer = pending_buffer[-max_phrase_length:]
 
140
  else:
141
+ # After the </think> marker, add tokens to pending_buffer.
142
  pending_buffer += token_text
143
+ if len(pending_buffer) >= MIN_FLUSH_SIZE:
144
+ safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
145
+ if safe_portion:
146
+ display_text += apply_replacements(safe_portion)
147
+ yield display_text
148
+ pending_buffer = pending_buffer[-max_phrase_length:]
149
 
150
+ # After processing all tokens, flush any remaining text.
151
  if pending_buffer:
152
+ safe_portion = pending_buffer # flush whatever remains
153
+ display_text += apply_replacements(safe_portion)
154
  yield display_text
155
 
156
+ # Append the full (raw) response, including the <think> section, to the conversation history.
157
  # If you want the history to reflect the replacements, apply them here.
158
  modified_full_response = apply_replacements(full_response)
159
  history.append((message, modified_full_response))