Spaces:

hadadrjt
/

ai

Running

App Files Files Community

ai / src /cores /server.py

hadadrjt

ai: Refactor the code.

d17e7ef 7 days ago

raw

history blame

7.14 kB

	#
	# SPDX-FileCopyrightText: Hadad <[email protected]>
	# SPDX-License-Identifier: Apache-2.0
	#

	import codecs # Import codecs module for encoding and decoding operations, useful for handling text data
	import httpx # Import httpx for making asynchronous HTTP requests to external servers or APIs
	import json # Import json module to parse JSON formatted strings into Python objects and vice versa

	from src.cores.session import marked_item # Import marked_item function to track and mark keys that fail repeatedly, helping to avoid using problematic keys
	from config import LINUX_SERVER_ERRORS, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS, RESPONSES # Import various constants used for error handling, key marking, retry attempts, and predefined responses

	async def fetch_response_stream_async(host, key, model, msgs, cfg, sid, stop_event, cancel_token):
	"""
	Asynchronous generator function that streams AI-generated responses from a backend server endpoint.

	Parameters:
	- host: The URL of the backend server to send the request to.
	- key: Authorization token (API key) used in the request header for authentication.
	- model: The AI model identifier to be used for generating responses.
	- msgs: The list of messages forming the conversation or prompt to send to the AI.
	- cfg: Configuration dictionary containing additional parameters for the request.
	- sid: Session ID string to associate the request with a particular session.
	- stop_event: An asynchronous event object that signals when to stop streaming responses.
	- cancel_token: A dictionary containing a 'cancelled' boolean flag to abort the streaming operation.

	This function attempts to connect to the backend server twice with different timeout values (5 and 10 seconds).
	It sends a POST request with JSON payload that includes model, messages, session ID, stream flag, and configuration.
	The function streams the response line-by-line, parsing JSON data chunks as they arrive.

	The streamed data contains two types of text parts:
	- 'reasoning': Additional reasoning text that can be displayed separately in the UI for richer user experience.
	- 'content': The main content text generated by the AI.

	The function yields tuples of the form ('reasoning', text) or ('content', text) to the caller asynchronously.

	If the server returns an error status code listed in LINUX_SERVER_ERRORS, the key is marked as problematic to avoid future use.
	The function also respects stop_event and cancel_token to allow graceful cancellation of the streaming process.

	If the response signals completion with a specific message defined in RESPONSES["RESPONSE_10"], the function ends the stream.

	The function handles exceptions gracefully, including network errors and JSON parsing issues, retrying or marking keys as needed.
	"""
	# Loop over two timeout values to attempt the request with increasing timeout durations for robustness
	for timeout in [5, 10]:
	try:
	# Create an asynchronous HTTP client with the specified timeout for the request
	async with httpx.AsyncClient(timeout=timeout) as client:
	# Open a streaming POST request to the backend server with JSON payload and authorization header
	async with client.stream(
	"POST",
	host,
	# Combine fixed parameters with additional configuration into the JSON body
	json={{"model": model, "messages": msgs, "session_id": sid, "stream": True}, cfg},
	headers={"Authorization": f"Bearer {key}"} # Use Bearer token authentication
	) as response:
	# Check if the response status code indicates a server error that should mark the key
	if response.status_code in LINUX_SERVER_ERRORS:
	# Mark the key as problematic with the provided tracking function and exit the generator
	marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
	return

	# Iterate asynchronously over each line of the streamed response content
	async for line in response.aiter_lines():
	# If the stop event is set or cancellation is requested, stop streaming and exit
	if stop_event.is_set() or cancel_token["cancelled"]:
	return
	# Skip empty lines to avoid unnecessary processing
	if not line:
	continue
	# Process lines that start with the prefix 'data: ' which contain JSON payloads
	if line.startswith("data: "):
	data = line[6:] # Extract the JSON string after 'data: '
	# If the data matches the predefined end-of-response message, stop streaming
	if data.strip() == RESPONSES["RESPONSE_10"]:
	return
	try:
	# Attempt to parse the JSON data string into a Python dictionary
	j = json.loads(data)
	# Check if the parsed object is a dictionary containing 'choices' key
	if isinstance(j, dict) and j.get("choices"):
	# Iterate over each choice in the response to extract text deltas
	for ch in j["choices"]:
	delta = ch.get("delta", {}) # Get the incremental update part
	# If 'reasoning' text is present in the delta, decode unicode escapes and yield it
	if "reasoning" in delta and delta["reasoning"]:
	decoded = delta["reasoning"].encode('utf-8').decode('unicode_escape')
	yield ("reasoning", decoded) # Yield reasoning text for UI display
	# If main 'content' text is present in the delta, yield it directly
	if "content" in delta and delta["content"]:
	yield ("content", delta["content"]) # Yield main content text
	except Exception:
	# Ignore exceptions from malformed JSON or unexpected data formats and continue streaming
	continue
	except Exception:
	# Catch network errors, timeouts, or other exceptions and try the next timeout or retry
	continue
	# If all attempts fail, mark the key as problematic to avoid future use
	marked_item(key, LINUX_SERVER_PROVIDER_KEYS_MARKED, LINUX_SERVER_PROVIDER_KEYS_ATTEMPTS)
	# Return None explicitly when streaming ends or fails after retries
	return