Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -114,11 +114,6 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
|
|
114 |
think_detected = False
|
115 |
full_response = "" # Accumulates the full raw response (without replacements applied).
|
116 |
|
117 |
-
# These are assumed to be defined in your application.
|
118 |
-
history = [] # The conversation history.
|
119 |
-
message = "User message" # The user’s message (or any identifier).
|
120 |
-
# 'response' is assumed to be an iterable of token chunks.
|
121 |
-
|
122 |
# Process streaming responses.
|
123 |
for chunk in response:
|
124 |
# Extract the new token text from the current chunk.
|
@@ -127,40 +122,38 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
|
|
127 |
full_response += token_text
|
128 |
|
129 |
if not think_detected:
|
130 |
-
# Accumulate tokens until the </think> marker
|
131 |
buffer += token_text
|
132 |
if "</think>" in buffer:
|
133 |
think_detected = True
|
134 |
-
# Discard everything up to and including the </think> marker.
|
135 |
after_think = buffer.split("</think>", 1)[1]
|
136 |
-
# Start the pending buffer with the text after the marker.
|
137 |
pending_buffer += after_think
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
safe_portion = pending_buffer[:-max_phrase_length]
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
pending_buffer = pending_buffer[-max_phrase_length:]
|
147 |
else:
|
148 |
-
#
|
149 |
pending_buffer += token_text
|
150 |
-
if len(pending_buffer)
|
151 |
-
safe_portion = pending_buffer[:-max_phrase_length]
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
|
157 |
-
# After processing all
|
158 |
if pending_buffer:
|
159 |
-
safe_portion =
|
160 |
-
display_text += safe_portion
|
161 |
yield display_text
|
162 |
|
163 |
-
#
|
164 |
# If you want the history to reflect the replacements, apply them here.
|
165 |
modified_full_response = apply_replacements(full_response)
|
166 |
history.append((message, modified_full_response))
|
|
|
114 |
think_detected = False
|
115 |
full_response = "" # Accumulates the full raw response (without replacements applied).
|
116 |
|
|
|
|
|
|
|
|
|
|
|
117 |
# Process streaming responses.
|
118 |
for chunk in response:
|
119 |
# Extract the new token text from the current chunk.
|
|
|
122 |
full_response += token_text
|
123 |
|
124 |
if not think_detected:
|
125 |
+
# Accumulate tokens until we see the closing </think> marker.
|
126 |
buffer += token_text
|
127 |
if "</think>" in buffer:
|
128 |
think_detected = True
|
129 |
+
# Discard everything up to and including the "</think>" marker.
|
130 |
after_think = buffer.split("</think>", 1)[1]
|
|
|
131 |
pending_buffer += after_think
|
132 |
+
# Only flush if we have at least MIN_FLUSH_SIZE characters.
|
133 |
+
if len(pending_buffer) >= MIN_FLUSH_SIZE:
|
134 |
+
# Flush all but the last max_phrase_length characters.
|
135 |
+
safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
|
136 |
+
if safe_portion:
|
137 |
+
display_text += apply_replacements(safe_portion)
|
138 |
+
yield display_text
|
139 |
+
pending_buffer = pending_buffer[-max_phrase_length:]
|
|
|
140 |
else:
|
141 |
+
# After the </think> marker, add tokens to pending_buffer.
|
142 |
pending_buffer += token_text
|
143 |
+
if len(pending_buffer) >= MIN_FLUSH_SIZE:
|
144 |
+
safe_portion = pending_buffer[:-max_phrase_length] if len(pending_buffer) > max_phrase_length else ""
|
145 |
+
if safe_portion:
|
146 |
+
display_text += apply_replacements(safe_portion)
|
147 |
+
yield display_text
|
148 |
+
pending_buffer = pending_buffer[-max_phrase_length:]
|
149 |
|
150 |
+
# After processing all tokens, flush any remaining text.
|
151 |
if pending_buffer:
|
152 |
+
safe_portion = pending_buffer # flush whatever remains
|
153 |
+
display_text += apply_replacements(safe_portion)
|
154 |
yield display_text
|
155 |
|
156 |
+
# Append the full (raw) response, including the <think> section, to the conversation history.
|
157 |
# If you want the history to reflect the replacements, apply them here.
|
158 |
modified_full_response = apply_replacements(full_response)
|
159 |
history.append((message, modified_full_response))
|