Spaces:

hadadrjt
/

ai

Running

App Files Files Community

ai / src /cores /client.py

hadadrjt

ai: Refactor the code.

d17e7ef 7 days ago

raw

history blame

8.27 kB

	#
	# SPDX-FileCopyrightText: Hadad <[email protected]>
	# SPDX-License-Identifier: Apache-2.0
	#

	import asyncio # Import asyncio for asynchronous programming capabilities
	import httpx # Import httpx to perform asynchronous HTTP requests
	import json # Import json to handle JSON encoding and decoding
	import random # Import random to shuffle lists for load balancing
	import uuid # Import uuid to generate unique session identifiers

	from config import * # Import all configuration constants and variables from config module
	from src.cores.server import fetch_response_stream_async # Import async function to fetch streamed AI responses
	from src.cores.session import ensure_stop_event, get_model_key # Import session helper functions
	from datetime import datetime # Import datetime to get current date and time information

	async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
	"""
	Asynchronous function to handle interaction with an AI model and stream its responses.

	Parameters:
	- history: List of tuples containing previous conversation messages (user and assistant)
	- user_input: The current input string from the user
	- model_display: The display name of the AI model to use
	- sess: Session object containing session state, stop event, and cancellation token
	- custom_prompt: Optional custom system instructions to override default instructions
	- deep_search: Boolean flag indicating whether to integrate deep search results into the instructions

	This function prepares the message history and system instructions, optionally enriches the instructions
	with deep search results if enabled, and attempts to fetch streamed responses from multiple backend
	providers with fallback. It yields chunks of the response asynchronously for real-time UI updates.
	"""

	# Ensure the session has a stop event initialized to control streaming cancellation
	ensure_stop_event(sess)

	# Clear any previous stop event state to allow new streaming session
	sess.stop_event.clear()

	# Reset the cancellation token to indicate the session is active and not cancelled
	sess.cancel_token["cancelled"] = False

	# Check if provider keys and hosts are configured; if not, yield a predefined error response and exit
	if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
	yield ("content", RESPONSES["RESPONSE_3"]) # Inform user no backend providers are available
	return

	# Assign a unique session ID if not already present to track conversation context
	if not hasattr(sess, "session_id") or not sess.session_id:
	sess.session_id = str(uuid.uuid4())

	# Determine the internal model key based on the display name, falling back to default if not found
	model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)

	# Retrieve model-specific configuration parameters or use default configuration
	cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)

	# Initialize a list to hold the messages that will be sent to the AI model
	msgs = []

	# Obtain the current date and time formatted as a readable string for context in instructions
	current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")

	# Combine internal AI instructions with the current date to form a comprehensive system instructions
	COMBINED_AI_INSTRUCTIONS = (
	INTERNAL_AI_INSTRUCTIONS
	+ "\n\n\n"
	+ f"Today is: {current_date}"
	+ "\n\n\n"
	)

	# If deep search is enabled and the primary model is selected, prepend deep search instructions and results
	if deep_search and model_display == MODEL_CHOICES[0]:
	# Add deep search instructions as a system message to guide the AI
	msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
	try:
	# Create an asynchronous HTTP client session for making the deep search request
	async with httpx.AsyncClient() as client:
	# Define the payload with parameters for the deep search query
	payload = {
	"query": user_input,
	"topic": "general",
	"search_depth": "basic",
	"chunks_per_source": 5,
	"max_results": 5,
	"time_range": None,
	"days": 7,
	"include_answer": True,
	"include_raw_content": False,
	"include_images": False,
	"include_image_descriptions": False,
	"include_domains": [],
	"exclude_domains": []
	}
	# Send a POST request to the deep search provider with authorization header and JSON payload
	r = await client.post(
	DEEP_SEARCH_PROVIDER_HOST,
	headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"},
	json=payload
	)
	# Parse the JSON response from the deep search provider
	sr_json = r.json()
	# Append the deep search results as a system message in JSON string format
	msgs.append({"role": "system", "content": json.dumps(sr_json)})
	except Exception:
	# If any error occurs during deep search, fail silently without interrupting the chat flow
	pass
	# Append the combined AI instructions after the deep search content to maintain context
	msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})

	# If deep search is not enabled but the primary model is selected, use only the combined AI instructions
	elif model_display == MODEL_CHOICES[0]:
	msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})

	# For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions
	else:
	msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})

	# Append the conversation history to the message list, alternating user and assistant messages
	# First add all user messages from history
	msgs.extend([{"role": "user", "content": u} for u, _ in history])
	# Then add all assistant messages from history that are not empty
	msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])

	# Append the current user input as the latest user message
	msgs.append({"role": "user", "content": user_input})

	# Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback
	candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]

	# Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias
	random.shuffle(candidates)

	# Iterate over each host and key pair to attempt fetching a streamed response
	for h, k in candidates:
	# Call the async generator function to fetch streamed response chunks from the backend
	stream_gen = fetch_response_stream_async(
	h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token
	)

	# Flag to track if any response chunks were received from this provider
	got_responses = False

	# Asynchronously iterate over each chunk yielded by the streaming generator
	async for chunk in stream_gen:
	# If the stop event is set or cancellation requested, terminate streaming immediately
	if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
	return

	# Mark that at least one response chunk has been received
	got_responses = True

	# Yield the current chunk to the caller for incremental UI update or processing
	yield chunk

	# If any responses were received from this host-key pair, stop trying others and return
	if got_responses:
	return

	# If no responses were received from any provider, yield a fallback message indicating failure
	yield ("content", RESPONSES["RESPONSE_2"])