ai / src /cores /server.py
hadadrjt's picture
ai: Refactor the code.
d17e7ef
raw
history blame
7.14 kB
#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#
import codecs # Import codecs module for encoding and decoding operations, useful for handling text data
import httpx # Import httpx for making asynchronous HTTP requests to external servers or APIs
import json # Import json module to parse JSON formatted strings into Python objects and vice versa
from src.cores.session import marked_item # Import marked_item function to track and mark keys that fail repeatedly, helping to avoid using problematic keys
from config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES # Import various constants used for error handling, key marking, retry attempts, and predefined responses
async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
"""
Asynchronous generator function that streams AI-generated responses from a backend server endpoint.
Parameters:
- host: The URL of the backend server to send the request to.
- key: Authorization token (API key) used in the request header for authentication.
- model: The AI model identifier to be used for generating responses.
- msgs: The list of messages forming the conversation or prompt to send to the AI.
- cfg: Configuration dictionary containing additional parameters for the request.
- sid: Session ID string to associate the request with a particular session.
- stop_event: An asynchronous event object that signals when to stop streaming responses.
- cancel_token: A dictionary containing a 'cancelled' boolean flag to abort the streaming operation.
This function attempts to connect to the backend server twice with different timeout values (5 and 10 seconds).
It sends a POST request with JSON payload that includes model, messages, session ID, stream flag, and configuration.
The function streams the response line-by-line, parsing JSON data chunks as they arrive.
The streamed data contains two types of text parts:
- 'reasoning': Additional reasoning text that can be displayed separately in the UI for richer user experience.
- 'content': The main content text generated by the AI.
The function yields tuples of the form ('reasoning', text) or ('content', text) to the caller asynchronously.
If the server returns an error status code listed in LINUX_SERVER_ERRORS, the key is marked as problematic to avoid future use.
The function also respects stop_event and cancel_token to allow graceful cancellation of the streaming process.
If the response signals completion with a specific message defined in RESPONSES["RESPONSE_10"], the function ends the stream.
The function handles exceptions gracefully, including network errors and JSON parsing issues, retrying or marking keys as needed.
"""
# Loop over two timeout values to attempt the request with increasing timeout durations for robustness
for timeout in [5, 10]:
try:
# Create an asynchronous HTTP client with the specified timeout for the request
async with httpx.AsyncClient(timeout=timeout) as client:
# Open a streaming POST request to the backend server with JSON payload and authorization header
async with client.stream(
"POST",
host,
# Combine fixed parameters with additional configuration into the JSON body
json={**{"model": model, "messages": msgs, "session_id": sid, "stream": True}, **cfg},
headers={"Authorization": f"Bearer {key}"} # Use Bearer token authentication
) as response:
# Check if the response status code indicates a server error that should mark the key
if response.status_code in LINUX_SERVER_ERRORS:
# Mark the key as problematic with the provided tracking function and exit the generator
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
return
# Iterate asynchronously over each line of the streamed response content
async for line in response.aiter_lines():
# If the stop event is set or cancellation is requested, stop streaming and exit
if stop_event.is_set() or cancel_token["cancelled"]:
return
# Skip empty lines to avoid unnecessary processing
if not line:
continue
# Process lines that start with the prefix 'data: ' which contain JSON payloads
if line.startswith("data: "):
data = line[6:] # Extract the JSON string after 'data: '
# If the data matches the predefined end-of-response message, stop streaming
if data.strip() == RESPONSES["RESPONSE_10"]:
return
try:
# Attempt to parse the JSON data string into a Python dictionary
j = json.loads(data)
# Check if the parsed object is a dictionary containing 'choices' key
if isinstance(j, dict) and j.get("choices"):
# Iterate over each choice in the response to extract text deltas
for ch in j["choices"]:
delta = ch.get("delta", {}) # Get the incremental update part
# If 'reasoning' text is present in the delta, decode unicode escapes and yield it
if "reasoning" in delta and delta["reasoning"]:
decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
yield ("reasoning", decoded) # Yield reasoning text for UI display
# If main 'content' text is present in the delta, yield it directly
if "content" in delta and delta["content"]:
yield ("content", delta["content"]) # Yield main content text
except Exception:
# Ignore exceptions from malformed JSON or unexpected data formats and continue streaming
continue
except Exception:
# Catch network errors, timeouts, or other exceptions and try the next timeout or retry
continue
# If all attempts fail, mark the key as problematic to avoid future use
marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
# Return None explicitly when streaming ends or fails after retries
return