ai / src /cores /client.py
hadadrjt's picture
ai: Refactor the code.
d17e7ef
raw
history blame
8.27 kB
#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#
import asyncio # Import asyncio for asynchronous programming capabilities
import httpx # Import httpx to perform asynchronous HTTP requests
import json # Import json to handle JSON encoding and decoding
import random # Import random to shuffle lists for load balancing
import uuid # Import uuid to generate unique session identifiers
from config import * # Import all configuration constants and variables from config module
from src.cores.server import fetch_response_stream_async # Import async function to fetch streamed AI responses
from src.cores.session import ensure_stop_event, get_model_key # Import session helper functions
from datetime import datetime # Import datetime to get current date and time information
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
"""
Asynchronous function to handle interaction with an AI model and stream its responses.
Parameters:
- history: List of tuples containing previous conversation messages (user and assistant)
- user_input: The current input string from the user
- model_display: The display name of the AI model to use
- sess: Session object containing session state, stop event, and cancellation token
- custom_prompt: Optional custom system instructions to override default instructions
- deep_search: Boolean flag indicating whether to integrate deep search results into the instructions
This function prepares the message history and system instructions, optionally enriches the instructions
with deep search results if enabled, and attempts to fetch streamed responses from multiple backend
providers with fallback. It yields chunks of the response asynchronously for real-time UI updates.
"""
# Ensure the session has a stop event initialized to control streaming cancellation
ensure_stop_event(sess)
# Clear any previous stop event state to allow new streaming session
sess.stop_event.clear()
# Reset the cancellation token to indicate the session is active and not cancelled
sess.cancel_token["cancelled"] = False
# Check if provider keys and hosts are configured; if not, yield a predefined error response and exit
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
yield ("content", RESPONSES["RESPONSE_3"]) # Inform user no backend providers are available
return
# Assign a unique session ID if not already present to track conversation context
if not hasattr(sess, "session_id") or not sess.session_id:
sess.session_id = str(uuid.uuid4())
# Determine the internal model key based on the display name, falling back to default if not found
model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)
# Retrieve model-specific configuration parameters or use default configuration
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)
# Initialize a list to hold the messages that will be sent to the AI model
msgs = []
# Obtain the current date and time formatted as a readable string for context in instructions
current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")
# Combine internal AI instructions with the current date to form a comprehensive system instructions
COMBINED_AI_INSTRUCTIONS = (
INTERNAL_AI_INSTRUCTIONS
+ "\n\n\n"
+ f"Today is: {current_date}"
+ "\n\n\n"
)
# If deep search is enabled and the primary model is selected, prepend deep search instructions and results
if deep_search and model_display == MODEL_CHOICES[0]:
# Add deep search instructions as a system message to guide the AI
msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
try:
# Create an asynchronous HTTP client session for making the deep search request
async with httpx.AsyncClient() as client:
# Define the payload with parameters for the deep search query
payload = {
"query": user_input,
"topic": "general",
"search_depth": "basic",
"chunks_per_source": 5,
"max_results": 5,
"time_range": None,
"days": 7,
"include_answer": True,
"include_raw_content": False,
"include_images": False,
"include_image_descriptions": False,
"include_domains": [],
"exclude_domains": []
}
# Send a POST request to the deep search provider with authorization header and JSON payload
r = await client.post(
DEEP_SEARCH_PROVIDER_HOST,
headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"},
json=payload
)
# Parse the JSON response from the deep search provider
sr_json = r.json()
# Append the deep search results as a system message in JSON string format
msgs.append({"role": "system", "content": json.dumps(sr_json)})
except Exception:
# If any error occurs during deep search, fail silently without interrupting the chat flow
pass
# Append the combined AI instructions after the deep search content to maintain context
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
# If deep search is not enabled but the primary model is selected, use only the combined AI instructions
elif model_display == MODEL_CHOICES[0]:
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})
# For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions
else:
msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})
# Append the conversation history to the message list, alternating user and assistant messages
# First add all user messages from history
msgs.extend([{"role": "user", "content": u} for u, _ in history])
# Then add all assistant messages from history that are not empty
msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])
# Append the current user input as the latest user message
msgs.append({"role": "user", "content": user_input})
# Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback
candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]
# Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias
random.shuffle(candidates)
# Iterate over each host and key pair to attempt fetching a streamed response
for h, k in candidates:
# Call the async generator function to fetch streamed response chunks from the backend
stream_gen = fetch_response_stream_async(
h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token
)
# Flag to track if any response chunks were received from this provider
got_responses = False
# Asynchronously iterate over each chunk yielded by the streaming generator
async for chunk in stream_gen:
# If the stop event is set or cancellation requested, terminate streaming immediately
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
return
# Mark that at least one response chunk has been received
got_responses = True
# Yield the current chunk to the caller for incremental UI update or processing
yield chunk
# If any responses were received from this host-key pair, stop trying others and return
if got_responses:
return
# If no responses were received from any provider, yield a fallback message indicating failure
yield ("content", RESPONSES["RESPONSE_2"])