Spaces:

hadadrjt
/

ai

Running

hadadrjt commited on 6 days ago

Commit

ce9d223

1 Parent(s): d17e7ef

Reapply "ai: Append reasoning tag."

* 77e3d5f76d19fa7474b15d168425c13ddf0ad885

Updated with new code.

Needed for: https://huggingface.co/spaces/hadadrjt/api

Files changed (1) hide show

src/main/gradio.py +61 -27

src/main/gradio.py CHANGED Viewed

@@ -55,54 +55,88 @@ async def respond_async(multi, history, model_display, sess, custom_prompt, deep
     if msg_input["text"]:
         inp += msg_input["text"]
-    # Append user input to chat history with placeholder AI response
-    history.append([inp, RESPONSES["RESPONSE_8"]])  # RESPONSE_8 is a placeholder text
     # Yield updated history and disable input while AI is responding
     yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
-    queue = asyncio.Queue()  # Queue to hold streamed AI response chunks
     async def background():
         """
-        Background async task to fetch streamed AI responses from the model.
-        Handles reasoning and content chunks separately.
-        Supports cancellation via session stop event.
         """
-        reasoning = ""  # Accumulate reasoning text
-        responses = ""  # Accumulate content text
-        content_started = False  # Flag to indicate content streaming started
-        ignore_reasoning = False  # Flag to ignore reasoning after content starts
-        # Async iterate over streaming response chunks from AI model
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
-            # Break if user requested stop or cancellation flagged
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 break
             if typ == "reasoning":
-                # Append reasoning chunk unless ignoring reasoning after content start
                 if ignore_reasoning:
                     continue
-                reasoning += chunk
-                # Put formatted reasoning text into queue for UI update
                 await queue.put(("reasoning", reasoning))
             elif typ == "content":
                 if not content_started:
-                    # On first content chunk, clear reasoning and start content accumulation
                     content_started = True
                     ignore_reasoning = True
                     responses = chunk
-                    await queue.put(("reasoning", ""))  # Clear reasoning display
-                    await queue.put(("replace", responses))  # Replace placeholder with content start
                 else:
-                    # Append subsequent content chunks and update UI
                     responses += chunk
                     await queue.put(("append", responses))
-        await queue.put(None)  # Signal completion of streaming
-        return responses  # Return final complete response text
     bg_task = asyncio.create_task(background())  # Start background streaming task
     stop_task = asyncio.create_task(sess.stop_event.wait())  # Task to wait for stop event

     if msg_input["text"]:
         inp += msg_input["text"]
+    # Append user input to chat history
+    history.append([inp, ""])  # placeholder
     # Yield updated history and disable input while AI is responding
     yield history, gr.update(interactive=False, submit_btn=False, stop_btn=True), sess
+    # Create queue for streaming AI response chunks
+    queue = asyncio.Queue()
     async def background():
         """
+        This coroutine handles streaming responses from an AI model asynchronously.
+        It processes two types of streamed data separately: 'reasoning' chunks and 'content' chunks.
+        The function supports graceful cancellation if a stop event or cancel token is triggered in the session.
+        Reasoning text is accumulated until content streaming starts, after which reasoning is ignored.
+        Special tags <think> and </think> are managed to mark reasoning sections for UI display.
+        Content chunks are streamed and accumulated separately, with incremental UI updates.
+        When streaming ends, any open reasoning tags are closed properly.
+        Finally, the function signals completion by putting None into the queue and returns the full content response.
         """
+        reasoning = ""  # String to accumulate reasoning text chunks
+        responses = ""  # String to accumulate content text chunks
+        content_started = False  # Flag to indicate if content streaming has begun
+        ignore_reasoning = False  # Flag to ignore reasoning after content starts streaming
+        think_opened = False  # Flag to track if reasoning <think> tag has been sent
+        # Asynchronously iterate over streamed response chunks from the AI model
         async for typ, chunk in chat_with_model_async(history, inp, model_display, sess, custom_prompt, deep_search):
+            # Break the loop if user requested stop or cancellation is flagged
             if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                 break
             if typ == "reasoning":
+                # Append reasoning chunk unless ignoring reasoning after content started
                 if ignore_reasoning:
                     continue
+                # Handle opening <think> tag for reasoning
+                if chunk.strip() == "<think>":
+                    if not think_opened:
+                        think_opened = True  # Mark that reasoning tag has been opened
+                    continue  # Skip sending the tag itself to UI
+                if not think_opened:
+                    # If reasoning tag not yet opened, prepend it and mark as opened
+                    reasoning += "<think>\n" + chunk
+                    think_opened = True
+                else:
+                    # Append reasoning chunk normally
+                    reasoning += chunk
+                # Send current reasoning content to queue for UI update (without sending tag again)
                 await queue.put(("reasoning", reasoning))
             elif typ == "content":
                 if not content_started:
+                    # On first content chunk, mark content started and ignore further reasoning
                     content_started = True
                     ignore_reasoning = True
+                    if think_opened:
+                        # Close reasoning tag before sending content
+                        reasoning += "\n</think>\n\n"
+                        await queue.put(("reasoning", reasoning))  # Update UI with closed reasoning
+                    else:
+                        # No reasoning was sent, clear reasoning display in UI
+                        await queue.put(("reasoning", ""))
+                    # Start accumulating content and send initial content to UI replacing placeholder
                     responses = chunk
+                    await queue.put(("replace", responses))
                 else:
+                    # Append subsequent content chunks and update UI incrementally
                     responses += chunk
                     await queue.put(("append", responses))
+        # If stream ends without content, close reasoning tag if it was opened
+        if think_opened and not content_started:
+            reasoning += "\n</think>\n\n"
+            await queue.put(("reasoning", reasoning))
+        # Signal completion of streaming by putting None into the queue
+        await queue.put(None)
+        # Return the full accumulated content response
+        return responses
     bg_task = asyncio.create_task(background())  # Start background streaming task
     stop_task = asyncio.create_task(sess.stop_event.wait())  # Task to wait for stop event