reedmayhew commited on
Commit
40c24da
·
verified ·
1 Parent(s): d1594b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -29
app.py CHANGED
@@ -103,63 +103,61 @@ def chat_with_openai(message: str, history: list, temperature: float, max_new_to
103
  stream=True,
104
  )
105
 
106
- # Buffers and state flags.
107
- buffer = "" # Used before the </think> marker is detected.
108
- pending_buffer = "" # Sliding buffer for safely holding the tail.
 
109
  think_detected = False
110
- full_response = "" # Accumulates the full (raw) response.
111
 
112
- # Suppose these are defined elsewhere in your code:
113
  history = [] # The conversation history.
114
- message = "User message" # The user's message, for example.
115
  # 'response' is assumed to be an iterable of token chunks.
116
 
117
  # Process streaming responses.
118
  for chunk in response:
119
- # Extract the new token text from the chunk.
120
  delta = chunk.choices[0].delta
121
  token_text = delta.content or ""
122
  full_response += token_text
123
 
124
  if not think_detected:
125
- # Accumulate tokens until we see the closing </think> marker.
126
  buffer += token_text
127
  if "</think>" in buffer:
128
  think_detected = True
129
- # Discard everything up to and including the "</think>" marker.
130
  after_think = buffer.split("</think>", 1)[1]
131
- # Initialize the pending_buffer with the text after </think>.
132
  pending_buffer += after_think
133
 
134
- # If pending_buffer is large enough, yield the safe portion.
135
  if len(pending_buffer) > max_phrase_length:
136
- # All except the last max_phrase_length characters are safe to yield.
137
- to_yield = pending_buffer[:-max_phrase_length]
138
- # Apply replacements on the safe portion.
139
- to_yield = apply_replacements(to_yield)
140
- yield to_yield
141
- # Retain the last part in pending_buffer for potential split phrases.
142
  pending_buffer = pending_buffer[-max_phrase_length:]
143
  else:
144
- # Append new token text to pending_buffer.
145
  pending_buffer += token_text
146
-
147
- # If pending_buffer is longer than max_phrase_length, yield the safe portion.
148
  if len(pending_buffer) > max_phrase_length:
149
- # Extract the part that is definitely not part of a split phrase.
150
- to_yield = pending_buffer[:-max_phrase_length]
151
- to_yield = apply_replacements(to_yield)
152
- yield to_yield
153
- # Keep the last max_phrase_length characters in pending_buffer.
154
  pending_buffer = pending_buffer[-max_phrase_length:]
155
 
156
  # After processing all chunks, flush any remaining text in pending_buffer.
157
  if pending_buffer:
158
- to_yield = apply_replacements(pending_buffer)
159
- yield to_yield
 
160
 
161
- # Append the full (raw) response, including the <think> section, to the conversation history.
162
- # If you want the conversation history to reflect the replacements, apply them to full_response.
163
  modified_full_response = apply_replacements(full_response)
164
  history.append((message, modified_full_response))
165
 
 
103
  stream=True,
104
  )
105
 
106
+ # Initialize buffers and state flags.
107
+ buffer = "" # Accumulates tokens until the </think> marker is found.
108
+ pending_buffer = "" # Holds the tail end of text that may contain a partial phrase.
109
+ display_text = "" # Cumulative text that has been finalized and yielded.
110
  think_detected = False
111
+ full_response = "" # Accumulates the full raw response (without replacements applied).
112
 
113
+ # These are assumed to be defined in your application.
114
  history = [] # The conversation history.
115
+ message = "User message" # The users message (or any identifier).
116
  # 'response' is assumed to be an iterable of token chunks.
117
 
118
  # Process streaming responses.
119
  for chunk in response:
120
+ # Extract the new token text from the current chunk.
121
  delta = chunk.choices[0].delta
122
  token_text = delta.content or ""
123
  full_response += token_text
124
 
125
  if not think_detected:
126
+ # Accumulate tokens until the </think> marker is detected.
127
  buffer += token_text
128
  if "</think>" in buffer:
129
  think_detected = True
130
+ # Discard everything up to and including the </think> marker.
131
  after_think = buffer.split("</think>", 1)[1]
132
+ # Start the pending buffer with the text after the marker.
133
  pending_buffer += after_think
134
 
135
+ # If pending_buffer is large enough, extract the safe portion.
136
  if len(pending_buffer) > max_phrase_length:
137
+ safe_portion = pending_buffer[:-max_phrase_length]
138
+ safe_portion = apply_replacements(safe_portion)
139
+ display_text += safe_portion
140
+ yield display_text
141
+ # Retain only the last max_phrase_length characters in pending_buffer.
 
142
  pending_buffer = pending_buffer[-max_phrase_length:]
143
  else:
144
+ # Already passed the </think> marker, so work with pending_buffer.
145
  pending_buffer += token_text
 
 
146
  if len(pending_buffer) > max_phrase_length:
147
+ safe_portion = pending_buffer[:-max_phrase_length]
148
+ safe_portion = apply_replacements(safe_portion)
149
+ display_text += safe_portion
150
+ yield display_text
 
151
  pending_buffer = pending_buffer[-max_phrase_length:]
152
 
153
  # After processing all chunks, flush any remaining text in pending_buffer.
154
  if pending_buffer:
155
+ safe_portion = apply_replacements(pending_buffer)
156
+ display_text += safe_portion
157
+ yield display_text
158
 
159
+ # Optionally, store the full response (including the <think> section) into the conversation history.
160
+ # If you want the history to reflect the replacements, apply them here.
161
  modified_full_response = apply_replacements(full_response)
162
  history.append((message, modified_full_response))
163