Spaces:
Running
Running
Reapply "ai: Append reasoning tag."
Browse files* 77e3d5f76d19fa7474b15d168425c13ddf0ad885
Updated with new code.
Needed for: https://huggingface.co/spaces/hadadrjt/api
- src/main/gradio.py +61 -27
src/main/gradio.py
CHANGED
@@ -55,54 +55,88 @@ async def respond_async(multi, history, model_display, sess, custom_prompt, deep
|
|
55 |
if msg_input["text"]:
|
56 |
inp += msg_input["text"]
|
57 |
|
58 |
-
# Append user input to chat history
|
59 |
-
history.append([inp,
|
60 |
|
61 |
# Yield updated history and disable input while AI is responding
|
62 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
66 |
async def background():
|
67 |
"""
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
"""
|
72 |
-
reasoning = "" #
|
73 |
-
responses = "" #
|
74 |
-
content_started = False # Flag to indicate content streaming
|
75 |
-
ignore_reasoning = False # Flag to ignore reasoning after content starts
|
76 |
-
|
77 |
-
|
|
|
78 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
79 |
-
# Break if user requested stop or cancellation flagged
|
80 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
81 |
break
|
82 |
-
|
83 |
if typ == "reasoning":
|
84 |
-
# Append reasoning chunk unless ignoring reasoning after content
|
85 |
if ignore_reasoning:
|
86 |
continue
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
await queue.put(("reasoning", reasoning))
|
90 |
-
|
91 |
elif typ == "content":
|
92 |
if not content_started:
|
93 |
-
# On first content chunk,
|
94 |
content_started = True
|
95 |
ignore_reasoning = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
responses = chunk
|
97 |
-
await queue.put(("
|
98 |
-
await queue.put(("replace", responses)) # Replace placeholder with content start
|
99 |
else:
|
100 |
-
# Append subsequent content chunks and update UI
|
101 |
responses += chunk
|
102 |
await queue.put(("append", responses))
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
bg_task = asyncio.create_task(background()) # Start background streaming task
|
108 |
stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
|
|
|
55 |
if msg_input["text"]:
|
56 |
inp += msg_input["text"]
|
57 |
|
58 |
+
# Append user input to chat history
|
59 |
+
history.append([inp, ""]) # placeholder
|
60 |
|
61 |
# Yield updated history and disable input while AI is responding
|
62 |
yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
|
63 |
+
|
64 |
+
# Create queue for streaming AI response chunks
|
65 |
+
queue = asyncio.Queue()
|
66 |
+
|
67 |
async def background():
|
68 |
"""
|
69 |
+
This coroutine handles streaming responses from an AI model asynchronously.
|
70 |
+
It processes two types of streamed data separately: 'reasoning' chunks and 'content' chunks.
|
71 |
+
The function supports graceful cancellation if a stop event or cancel token is triggered in the session.
|
72 |
+
|
73 |
+
Reasoning text is accumulated until content streaming starts, after which reasoning is ignored.
|
74 |
+
Special tags <think> and </think> are managed to mark reasoning sections for UI display.
|
75 |
+
Content chunks are streamed and accumulated separately, with incremental UI updates.
|
76 |
+
|
77 |
+
When streaming ends, any open reasoning tags are closed properly.
|
78 |
+
Finally, the function signals completion by putting None into the queue and returns the full content response.
|
79 |
"""
|
80 |
+
reasoning = "" # String to accumulate reasoning text chunks
|
81 |
+
responses = "" # String to accumulate content text chunks
|
82 |
+
content_started = False # Flag to indicate if content streaming has begun
|
83 |
+
ignore_reasoning = False # Flag to ignore reasoning after content starts streaming
|
84 |
+
think_opened = False # Flag to track if reasoning <think> tag has been sent
|
85 |
+
|
86 |
+
# Asynchronously iterate over streamed response chunks from the AI model
|
87 |
async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
|
88 |
+
# Break the loop if user requested stop or cancellation is flagged
|
89 |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
|
90 |
break
|
91 |
+
|
92 |
if typ == "reasoning":
|
93 |
+
# Append reasoning chunk unless ignoring reasoning after content started
|
94 |
if ignore_reasoning:
|
95 |
continue
|
96 |
+
# Handle opening <think> tag for reasoning
|
97 |
+
if chunk.strip() == "<think>":
|
98 |
+
if not think_opened:
|
99 |
+
think_opened = True # Mark that reasoning tag has been opened
|
100 |
+
continue # Skip sending the tag itself to UI
|
101 |
+
if not think_opened:
|
102 |
+
# If reasoning tag not yet opened, prepend it and mark as opened
|
103 |
+
reasoning += "<think>\n" + chunk
|
104 |
+
think_opened = True
|
105 |
+
else:
|
106 |
+
# Append reasoning chunk normally
|
107 |
+
reasoning += chunk
|
108 |
+
# Send current reasoning content to queue for UI update (without sending tag again)
|
109 |
await queue.put(("reasoning", reasoning))
|
110 |
+
|
111 |
elif typ == "content":
|
112 |
if not content_started:
|
113 |
+
# On first content chunk, mark content started and ignore further reasoning
|
114 |
content_started = True
|
115 |
ignore_reasoning = True
|
116 |
+
if think_opened:
|
117 |
+
# Close reasoning tag before sending content
|
118 |
+
reasoning += "\n</think>\n\n"
|
119 |
+
await queue.put(("reasoning", reasoning)) # Update UI with closed reasoning
|
120 |
+
else:
|
121 |
+
# No reasoning was sent, clear reasoning display in UI
|
122 |
+
await queue.put(("reasoning", ""))
|
123 |
+
# Start accumulating content and send initial content to UI replacing placeholder
|
124 |
responses = chunk
|
125 |
+
await queue.put(("replace", responses))
|
|
|
126 |
else:
|
127 |
+
# Append subsequent content chunks and update UI incrementally
|
128 |
responses += chunk
|
129 |
await queue.put(("append", responses))
|
130 |
+
|
131 |
+
# If stream ends without content, close reasoning tag if it was opened
|
132 |
+
if think_opened and not content_started:
|
133 |
+
reasoning += "\n</think>\n\n"
|
134 |
+
await queue.put(("reasoning", reasoning))
|
135 |
+
|
136 |
+
# Signal completion of streaming by putting None into the queue
|
137 |
+
await queue.put(None)
|
138 |
+
# Return the full accumulated content response
|
139 |
+
return responses
|
140 |
|
141 |
bg_task = asyncio.create_task(background()) # Start background streaming task
|
142 |
stop_task = asyncio.create_task(sess.stop_event.wait()) # Task to wait for stop event
|