hadadrjt commited on
Commit
ce9d223
·
1 Parent(s): d17e7ef

Reapply "ai: Append reasoning tag."

Browse files

* 77e3d5f76d19fa7474b15d168425c13ddf0ad885

Updated with new code.

Needed for: https://huggingface.co/spaces/hadadrjt/api

Files changed (1) hide show
  1. src/main/gradio.py +61 -27
src/main/gradio.py CHANGED
@@ -55,54 +55,88 @@ async def respond_async(multi, history, model_display, sess, custom_prompt, deep
55
  if msg_input["text"]:
56
  inp += msg_input["text"]
57
 
58
- # Append user input to chat history with placeholder AI response
59
- history.append([inp, RESPONSES["RESPONSE_8"]]) # RESPONSE_8 is a placeholder text
60
 
61
  # Yield updated history and disable input while AI is responding
62
  yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
63
-
64
- queue = asyncio.Queue() # Queue to hold streamed AI response chunks
65
-
 
66
  async def background():
67
  """
68
- Background async task to fetch streamed AI responses from the model.
69
- Handles reasoning and content chunks separately.
70
- Supports cancellation via session stop event.
 
 
 
 
 
 
 
71
  """
72
- reasoning = "" # Accumulate reasoning text
73
- responses = "" # Accumulate content text
74
- content_started = False # Flag to indicate content streaming started
75
- ignore_reasoning = False # Flag to ignore reasoning after content starts
76
-
77
- # Async iterate over streaming response chunks from AI model
 
78
  async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
79
- # Break if user requested stop or cancellation flagged
80
  if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
81
  break
82
-
83
  if typ == "reasoning":
84
- # Append reasoning chunk unless ignoring reasoning after content start
85
  if ignore_reasoning:
86
  continue
87
- reasoning += chunk
88
- # Put formatted reasoning text into queue for UI update
 
 
 
 
 
 
 
 
 
 
 
89
  await queue.put(("reasoning", reasoning))
90
-
91
  elif typ == "content":
92
  if not content_started:
93
- # On first content chunk, clear reasoning and start content accumulation
94
  content_started = True
95
  ignore_reasoning = True
 
 
 
 
 
 
 
 
96
  responses = chunk
97
- await queue.put(("reasoning", "")) # Clear reasoning display
98
- await queue.put(("replace", responses)) # Replace placeholder with content start
99
  else:
100
- # Append subsequent content chunks and update UI
101
  responses += chunk
102
  await queue.put(("append", responses))
103
-
104
- await queue.put(None) # Signal completion of streaming
105
- return responses # Return final complete response text
 
 
 
 
 
 
 
106
 
107
  bg_task = asyncio.create_task(background()) # Start background streaming task
108
  stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
 
55
  if msg_input["text"]:
56
  inp += msg_input["text"]
57
 
58
+ # Append user input to chat history
59
+ history.append([inp, ""]) # placeholder
60
 
61
  # Yield updated history and disable input while AI is responding
62
  yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
63
+
64
+ # Create queue for streaming AI response chunks
65
+ queue = asyncio.Queue()
66
+
67
  async def background():
68
  """
69
+ This coroutine handles streaming responses from an AI model asynchronously.
70
+ It processes two types of streamed data separately: 'reasoning' chunks and 'content' chunks.
71
+ The function supports graceful cancellation if a stop event or cancel token is triggered in the session.
72
+
73
+ Reasoning text is accumulated until content streaming starts, after which reasoning is ignored.
74
+ Special tags <think> and </think> are managed to mark reasoning sections for UI display.
75
+ Content chunks are streamed and accumulated separately, with incremental UI updates.
76
+
77
+ When streaming ends, any open reasoning tags are closed properly.
78
+ Finally, the function signals completion by putting None into the queue and returns the full content response.
79
  """
80
+ reasoning = "" # String to accumulate reasoning text chunks
81
+ responses = "" # String to accumulate content text chunks
82
+ content_started = False # Flag to indicate if content streaming has begun
83
+ ignore_reasoning = False # Flag to ignore reasoning after content starts streaming
84
+ think_opened = False # Flag to track if reasoning <think> tag has been sent
85
+
86
+ # Asynchronously iterate over streamed response chunks from the AI model
87
  async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
88
+ # Break the loop if user requested stop or cancellation is flagged
89
  if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
90
  break
91
+
92
  if typ == "reasoning":
93
+ # Append reasoning chunk unless ignoring reasoning after content started
94
  if ignore_reasoning:
95
  continue
96
+ # Handle opening <think> tag for reasoning
97
+ if chunk.strip() == "<think>":
98
+ if not think_opened:
99
+ think_opened = True # Mark that reasoning tag has been opened
100
+ continue # Skip sending the tag itself to UI
101
+ if not think_opened:
102
+ # If reasoning tag not yet opened, prepend it and mark as opened
103
+ reasoning += "<think>\n" + chunk
104
+ think_opened = True
105
+ else:
106
+ # Append reasoning chunk normally
107
+ reasoning += chunk
108
+ # Send current reasoning content to queue for UI update (without sending tag again)
109
  await queue.put(("reasoning", reasoning))
110
+
111
  elif typ == "content":
112
  if not content_started:
113
+ # On first content chunk, mark content started and ignore further reasoning
114
  content_started = True
115
  ignore_reasoning = True
116
+ if think_opened:
117
+ # Close reasoning tag before sending content
118
+ reasoning += "\n</think>\n\n"
119
+ await queue.put(("reasoning", reasoning)) # Update UI with closed reasoning
120
+ else:
121
+ # No reasoning was sent, clear reasoning display in UI
122
+ await queue.put(("reasoning", ""))
123
+ # Start accumulating content and send initial content to UI replacing placeholder
124
  responses = chunk
125
+ await queue.put(("replace", responses))
 
126
  else:
127
+ # Append subsequent content chunks and update UI incrementally
128
  responses += chunk
129
  await queue.put(("append", responses))
130
+
131
+ # If stream ends without content, close reasoning tag if it was opened
132
+ if think_opened and not content_started:
133
+ reasoning += "\n</think>\n\n"
134
+ await queue.put(("reasoning", reasoning))
135
+
136
+ # Signal completion of streaming by putting None into the queue
137
+ await queue.put(None)
138
+ # Return the full accumulated content response
139
+ return responses
140
 
141
  bg_task = asyncio.create_task(background()) # Start background streaming task
142
  stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event