Spaces:
Running
Running
# | |
# SPDX-FileCopyrightText: Hadad <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 | |
# | |
import asyncio # Import asyncio for asynchronous programming capabilities | |
import httpx # Import httpx to perform asynchronous HTTP requests | |
import json # Import json to handle JSON encoding and decoding | |
import random # Import random to shuffle lists for load balancing | |
import uuid # Import uuid to generate unique session identifiers | |
from config import * # Import all configuration constants and variables from config module | |
from src.cores.server import fetch_response_stream_async # Import async function to fetch streamed AI responses | |
from src.cores.session import ensure_stop_event, get_model_key # Import session helper functions | |
from datetime import datetime # Import datetime to get current date and time information | |
async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search): | |
""" | |
Asynchronous function to handle interaction with an AI model and stream its responses. | |
Parameters: | |
- history: List of tuples containing previous conversation messages (user and assistant) | |
- user_input: The current input string from the user | |
- model_display: The display name of the AI model to use | |
- sess: Session object containing session state, stop event, and cancellation token | |
- custom_prompt: Optional custom system instructions to override default instructions | |
- deep_search: Boolean flag indicating whether to integrate deep search results into the instructions | |
This function prepares the message history and system instructions, optionally enriches the instructions | |
with deep search results if enabled, and attempts to fetch streamed responses from multiple backend | |
providers with fallback. It yields chunks of the response asynchronously for real-time UI updates. | |
""" | |
# Ensure the session has a stop event initialized to control streaming cancellation | |
ensure_stop_event(sess) | |
# Clear any previous stop event state to allow new streaming session | |
sess.stop_event.clear() | |
# Reset the cancellation token to indicate the session is active and not cancelled | |
sess.cancel_token["cancelled"] = False | |
# Check if provider keys and hosts are configured; if not, yield a predefined error response and exit | |
if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS: | |
yield ("content", RESPONSES["RESPONSE_3"]) # Inform user no backend providers are available | |
return | |
# Assign a unique session ID if not already present to track conversation context | |
if not hasattr(sess, "session_id") or not sess.session_id: | |
sess.session_id = str(uuid.uuid4()) | |
# Determine the internal model key based on the display name, falling back to default if not found | |
model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY) | |
# Retrieve model-specific configuration parameters or use default configuration | |
cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG) | |
# Initialize a list to hold the messages that will be sent to the AI model | |
msgs = [] | |
# Obtain the current date and time formatted as a readable string for context in instructions | |
current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z") | |
# Combine internal AI instructions with the current date to form a comprehensive system instructions | |
COMBINED_AI_INSTRUCTIONS = ( | |
INTERNAL_AI_INSTRUCTIONS | |
+ "\n\n\n" | |
+ f"Today is: {current_date}" | |
+ "\n\n\n" | |
) | |
# If deep search is enabled and the primary model is selected, prepend deep search instructions and results | |
if deep_search and model_display == MODEL_CHOICES[0]: | |
# Add deep search instructions as a system message to guide the AI | |
msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS}) | |
try: | |
# Create an asynchronous HTTP client session for making the deep search request | |
async with httpx.AsyncClient() as client: | |
# Define the payload with parameters for the deep search query | |
payload = { | |
"query": user_input, | |
"topic": "general", | |
"search_depth": "basic", | |
"chunks_per_source": 5, | |
"max_results": 5, | |
"time_range": None, | |
"days": 7, | |
"include_answer": True, | |
"include_raw_content": False, | |
"include_images": False, | |
"include_image_descriptions": False, | |
"include_domains": [], | |
"exclude_domains": [] | |
} | |
# Send a POST request to the deep search provider with authorization header and JSON payload | |
r = await client.post( | |
DEEP_SEARCH_PROVIDER_HOST, | |
headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"}, | |
json=payload | |
) | |
# Parse the JSON response from the deep search provider | |
sr_json = r.json() | |
# Append the deep search results as a system message in JSON string format | |
msgs.append({"role": "system", "content": json.dumps(sr_json)}) | |
except Exception: | |
# If any error occurs during deep search, fail silently without interrupting the chat flow | |
pass | |
# Append the combined AI instructions after the deep search content to maintain context | |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS}) | |
# If deep search is not enabled but the primary model is selected, use only the combined AI instructions | |
elif model_display == MODEL_CHOICES[0]: | |
msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS}) | |
# For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions | |
else: | |
msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)}) | |
# Append the conversation history to the message list, alternating user and assistant messages | |
# First add all user messages from history | |
msgs.extend([{"role": "user", "content": u} for u, _ in history]) | |
# Then add all assistant messages from history that are not empty | |
msgs.extend([{"role": "assistant", "content": a} for _, a in history if a]) | |
# Append the current user input as the latest user message | |
msgs.append({"role": "user", "content": user_input}) | |
# Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback | |
candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS] | |
# Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias | |
random.shuffle(candidates) | |
# Iterate over each host and key pair to attempt fetching a streamed response | |
for h, k in candidates: | |
# Call the async generator function to fetch streamed response chunks from the backend | |
stream_gen = fetch_response_stream_async( | |
h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token | |
) | |
# Flag to track if any response chunks were received from this provider | |
got_responses = False | |
# Asynchronously iterate over each chunk yielded by the streaming generator | |
async for chunk in stream_gen: | |
# If the stop event is set or cancellation requested, terminate streaming immediately | |
if sess.stop_event.is_set() or sess.cancel_token["cancelled"]: | |
return | |
# Mark that at least one response chunk has been received | |
got_responses = True | |
# Yield the current chunk to the caller for incremental UI update or processing | |
yield chunk | |
# If any responses were received from this host-key pair, stop trying others and return | |
if got_responses: | |
return | |
# If no responses were received from any provider, yield a fallback message indicating failure | |
yield ("content", RESPONSES["RESPONSE_2"]) | |