# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # import asyncio # Import asyncio for asynchronous programming capabilities import httpx # Import httpx to perform asynchronous HTTP requests import json # Import json to handle JSON encoding and decoding import random # Import random to shuffle lists for load balancing import uuid # Import uuid to generate unique session identifiers from config import * # Import all configuration constants and variables from config module from src.cores.server import fetch_response_stream_async # Import async function to fetch streamed AI responses from src.cores.session import ensure_stop_event, get_model_key # Import session helper functions from datetime import datetime # Import datetime to get current date and time information async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search): """ Asynchronous function to handle interaction with an AI model and stream its responses. Parameters: - history: List of tuples containing previous conversation messages (user and assistant) - user_input: The current input string from the user - model_display: The display name of the AI model to use - sess: Session object containing session state, stop event, and cancellation token - custom_prompt: Optional custom system instructions to override default instructions - deep_search: Boolean flag indicating whether to integrate deep search results into the instructions This function prepares the message history and system instructions, optionally enriches the instructions with deep search results if enabled, and attempts to fetch streamed responses from multiple backend providers with fallback. It yields chunks of the response asynchronously for real-time UI updates. """ # Ensure the session has a stop event initialized to control streaming cancellation ensure_stop_event(sess) # Clear any previous stop event state to allow new streaming session sess.stop_event.clear() # Reset the cancellation token to indicate the session is active and not cancelled sess.cancel_token["cancelled"] = False # Check if provider keys and hosts are configured; if not, yield a predefined error response and exit if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS: yield ("content", RESPONSES["RESPONSE_3"]) # Inform user no backend providers are available return # Assign a unique session ID if not already present to track conversation context if not hasattr(sess, "session_id") or not sess.session_id: sess.session_id = str(uuid.uuid4()) # Determine the internal model key based on the display name, falling back to default if not found model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY) # Retrieve model-specific configuration parameters or use default configuration cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG) # Initialize a list to hold the messages that will be sent to the AI model msgs = [] # Obtain the current date and time formatted as a readable string for context in instructions current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z") # Combine internal AI instructions with the current date to form a comprehensive system instructions COMBINED_AI_INSTRUCTIONS = ( INTERNAL_AI_INSTRUCTIONS + "\n\n\n" + f"Today is: {current_date}" + "\n\n\n" ) # If deep search is enabled and the primary model is selected, prepend deep search instructions and results if deep_search and model_display == MODEL_CHOICES[0]: # Add deep search instructions as a system message to guide the AI msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS}) try: # Create an asynchronous HTTP client session for making the deep search request async with httpx.AsyncClient() as client: # Define the payload with parameters for the deep search query payload = { "query": user_input, "topic": "general", "search_depth": "basic", "chunks_per_source": 5, "max_results": 5, "time_range": None, "days": 7, "include_answer": True, "include_raw_content": False, "include_images": False, "include_image_descriptions": False, "include_domains": [], "exclude_domains": [] } # Send a POST request to the deep search provider with authorization header and JSON payload r = await client.post( DEEP_SEARCH_PROVIDER_HOST, headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"}, json=payload ) # Parse the JSON response from the deep search provider sr_json = r.json() # Append the deep search results as a system message in JSON string format msgs.append({"role": "system", "content": json.dumps(sr_json)}) except Exception: # If any error occurs during deep search, fail silently without interrupting the chat flow pass # Append the combined AI instructions after the deep search content to maintain context msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS}) # If deep search is not enabled but the primary model is selected, use only the combined AI instructions elif model_display == MODEL_CHOICES[0]: msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS}) # For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions else: msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)}) # Append the conversation history to the message list, alternating user and assistant messages # First add all user messages from history msgs.extend([{"role": "user", "content": u} for u, _ in history]) # Then add all assistant messages from history that are not empty msgs.extend([{"role": "assistant", "content": a} for _, a in history if a]) # Append the current user input as the latest user message msgs.append({"role": "user", "content": user_input}) # Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS] # Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias random.shuffle(candidates) # Iterate over each host and key pair to attempt fetching a streamed response for h, k in candidates: # Call the async generator function to fetch streamed response chunks from the backend stream_gen = fetch_response_stream_async( h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token ) # Flag to track if any response chunks were received from this provider got_responses = False # Asynchronously iterate over each chunk yielded by the streaming generator async for chunk in stream_gen: # If the stop event is set or cancellation requested, terminate streaming immediately if sess.stop_event.is_set() or sess.cancel_token["cancelled"]: return # Mark that at least one response chunk has been received got_responses = True # Yield the current chunk to the caller for incremental UI update or processing yield chunk # If any responses were received from this host-key pair, stop trying others and return if got_responses: return # If no responses were received from any provider, yield a fallback message indicating failure yield ("content", RESPONSES["RESPONSE_2"])