Spaces:
Running
Running
# | |
# SPDX-FileCopyrightText: Hadad <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
import codecs # Import codecs module for encoding and decoding operations, useful for handling text data | |
import httpx # Import httpx for making asynchronous HTTP requests to external servers or APIs | |
import json # Import json module to parse JSON formatted strings into Python objects and vice versa | |
from src.cores.session import marked_item # Import marked_item function to track and mark keys that fail repeatedly, helping to avoid using problematic keys | |
from config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES # Import various constants used for error handling, key marking, retry attempts, and predefined responses | |
async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token): | |
""" | |
Asynchronous generator function that streams AI-generated responses from a backend server endpoint. | |
Parameters: | |
- host: The URL of the backend server to send the request to. | |
- key: Authorization token (API key) used in the request header for authentication. | |
- model: The AI model identifier to be used for generating responses. | |
- msgs: The list of messages forming the conversation or prompt to send to the AI. | |
- cfg: Configuration dictionary containing additional parameters for the request. | |
- sid: Session ID string to associate the request with a particular session. | |
- stop_event: An asynchronous event object that signals when to stop streaming responses. | |
- cancel_token: A dictionary containing a 'cancelled' boolean flag to abort the streaming operation. | |
This function attempts to connect to the backend server twice with different timeout values (5 and 10 seconds). | |
It sends a POST request with JSON payload that includes model, messages, session ID, stream flag, and configuration. | |
The function streams the response line-by-line, parsing JSON data chunks as they arrive. | |
The streamed data contains two types of text parts: | |
- 'reasoning': Additional reasoning text that can be displayed separately in the UI for richer user experience. | |
- 'content': The main content text generated by the AI. | |
The function yields tuples of the form ('reasoning', text) or ('content', text) to the caller asynchronously. | |
If the server returns an error status code listed in LINUX_SERVER_ERRORS, the key is marked as problematic to avoid future use. | |
The function also respects stop_event and cancel_token to allow graceful cancellation of the streaming process. | |
If the response signals completion with a specific message defined in RESPONSES["RESPONSE_10"], the function ends the stream. | |
The function handles exceptions gracefully, including network errors and JSON parsing issues, retrying or marking keys as needed. | |
""" | |
# Loop over two timeout values to attempt the request with increasing timeout durations for robustness | |
for timeout in [5, 10]: | |
try: | |
# Create an asynchronous HTTP client with the specified timeout for the request | |
async with httpx.AsyncClient(timeout=timeout) as client: | |
# Open a streaming POST request to the backend server with JSON payload and authorization header | |
async with client.stream( | |
"POST", | |
host, | |
# Combine fixed parameters with additional configuration into the JSON body | |
json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg}, | |
headers={"Authorization": f"Bearer {key}"} # Use Bearer token authentication | |
) as response: | |
# Check if the response status code indicates a server error that should mark the key | |
if response.status_code in LINUX_SERVER_ERRORS: | |
# Mark the key as problematic with the provided tracking function and exit the generator | |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS) | |
return | |
# Iterate asynchronously over each line of the streamed response content | |
async for line in response.aiter_lines(): | |
# If the stop event is set or cancellation is requested, stop streaming and exit | |
if stop_event.is_set() or cancel_token["cancelled"]: | |
return | |
# Skip empty lines to avoid unnecessary processing | |
if not line: | |
continue | |
# Process lines that start with the prefix 'data: ' which contain JSON payloads | |
if line.startswith("data: "): | |
data = line[6:] # Extract the JSON string after 'data: ' | |
# If the data matches the predefined end-of-response message, stop streaming | |
if data.strip() == RESPONSES["RESPONSE_10"]: | |
return | |
try: | |
# Attempt to parse the JSON data string into a Python dictionary | |
j = json.loads(data) | |
# Check if the parsed object is a dictionary containing 'choices' key | |
if isinstance(j, dict) and j.get("choices"): | |
# Iterate over each choice in the response to extract text deltas | |
for ch in j["choices"]: | |
delta = ch.get("delta", {}) # Get the incremental update part | |
# If 'reasoning' text is present in the delta, decode unicode escapes and yield it | |
if "reasoning" in delta and delta["reasoning"]: | |
decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape') | |
yield ("reasoning", decoded) # Yield reasoning text for UI display | |
# If main 'content' text is present in the delta, yield it directly | |
if "content" in delta and delta["content"]: | |
yield ("content", delta["content"]) # Yield main content text | |
except Exception: | |
# Ignore exceptions from malformed JSON or unexpected data formats and continue streaming | |
continue | |
except Exception: | |
# Catch network errors, timeouts, or other exceptions and try the next timeout or retry | |
continue | |
# If all attempts fail, mark the key as problematic to avoid future use | |
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS) | |
# Return None explicitly when streaming ends or fails after retries | |
return | |