Spaces:

hadadrjt
/

ai

Running

File size: 8,267 Bytes

f99ad65
 
 
 
 
d17e7ef
 
 
 
 
f99ad65
d17e7ef
 
 
 
f99ad65
 
 
d17e7ef
 
 
 
 
 
 
 
 
 
 
 
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
f99ad65
d17e7ef
 
f99ad65
 
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
2ca269f
d17e7ef
2ca269f
 
d17e7ef
2ca269f
 
 
 
 
 
 
d17e7ef
f99ad65
d17e7ef
f99ad65
 
d17e7ef
f99ad65
d17e7ef
f99ad65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d17e7ef
 
 
 
 
 
 
f99ad65
d17e7ef
f99ad65
 
d17e7ef
f99ad65
d17e7ef
2ca269f
d17e7ef
 
f99ad65
2ca269f
d17e7ef
 
f99ad65
 
2ca269f
d17e7ef
 
f99ad65
d17e7ef
f99ad65
d17e7ef
 
f99ad65
2ca269f
d17e7ef
f99ad65
d17e7ef
 
f99ad65
2ca269f
d17e7ef
f99ad65
d17e7ef
 
 
 
 
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
f99ad65
 
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
d17e7ef
 
f99ad65
 
d17e7ef
 
f99ad65

#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#

import asyncio  # Import asyncio for asynchronous programming capabilities
import httpx  # Import httpx to perform asynchronous HTTP requests
import json  # Import json to handle JSON encoding and decoding
import random  # Import random to shuffle lists for load balancing
import uuid  # Import uuid to generate unique session identifiers

from config import *  # Import all configuration constants and variables from config module
from src.cores.server import fetch_response_stream_async  # Import async function to fetch streamed AI responses
from src.cores.session import ensure_stop_event, get_model_key  # Import session helper functions
from datetime import datetime  # Import datetime to get current date and time information

async def chat_with_model_async(history, user_input, model_display, sess, custom_prompt, deep_search):
    """
    Asynchronous function to handle interaction with an AI model and stream its responses.

    Parameters:
    - history: List of tuples containing previous conversation messages (user and assistant)
    - user_input: The current input string from the user
    - model_display: The display name of the AI model to use
    - sess: Session object containing session state, stop event, and cancellation token
    - custom_prompt: Optional custom system instructions to override default instructions
    - deep_search: Boolean flag indicating whether to integrate deep search results into the instructions

    This function prepares the message history and system instructions, optionally enriches the instructions
    with deep search results if enabled, and attempts to fetch streamed responses from multiple backend
    providers with fallback. It yields chunks of the response asynchronously for real-time UI updates.
    """

    # Ensure the session has a stop event initialized to control streaming cancellation
    ensure_stop_event(sess)

    # Clear any previous stop event state to allow new streaming session
    sess.stop_event.clear()

    # Reset the cancellation token to indicate the session is active and not cancelled
    sess.cancel_token["cancelled"] = False

    # Check if provider keys and hosts are configured; if not, yield a predefined error response and exit
    if not LINUX_SERVER_PROVIDER_KEYS or not LINUX_SERVER_HOSTS:
        yield ("content", RESPONSES["RESPONSE_3"])  # Inform user no backend providers are available
        return

    # Assign a unique session ID if not already present to track conversation context
    if not hasattr(sess, "session_id") or not sess.session_id:
        sess.session_id = str(uuid.uuid4())

    # Determine the internal model key based on the display name, falling back to default if not found
    model_key = get_model_key(model_display, MODEL_MAPPING, DEFAULT_MODEL_KEY)

    # Retrieve model-specific configuration parameters or use default configuration
    cfg = MODEL_CONFIG.get(model_key, DEFAULT_CONFIG)

    # Initialize a list to hold the messages that will be sent to the AI model
    msgs = []

    # Obtain the current date and time formatted as a readable string for context in instructions
    current_date = datetime.now().strftime("%A, %B %d, %Y, %I:%M %p %Z")

    # Combine internal AI instructions with the current date to form a comprehensive system instructions
    COMBINED_AI_INSTRUCTIONS = (
        INTERNAL_AI_INSTRUCTIONS
        + "\n\n\n"
        + f"Today is: {current_date}"
        + "\n\n\n"
    )

    # If deep search is enabled and the primary model is selected, prepend deep search instructions and results
    if deep_search and model_display == MODEL_CHOICES[0]:
        # Add deep search instructions as a system message to guide the AI
        msgs.append({"role": "system", "content": DEEP_SEARCH_INSTRUCTIONS})
        try:
            # Create an asynchronous HTTP client session for making the deep search request
            async with httpx.AsyncClient() as client:
                # Define the payload with parameters for the deep search query
                payload = {
                    "query": user_input,
                    "topic": "general",
                    "search_depth": "basic",
                    "chunks_per_source": 5,
                    "max_results": 5,
                    "time_range": None,
                    "days": 7,
                    "include_answer": True,
                    "include_raw_content": False,
                    "include_images": False,
                    "include_image_descriptions": False,
                    "include_domains": [],
                    "exclude_domains": []
                }
                # Send a POST request to the deep search provider with authorization header and JSON payload
                r = await client.post(
                    DEEP_SEARCH_PROVIDER_HOST,
                    headers={"Authorization": f"Bearer {DEEP_SEARCH_PROVIDER_KEY}"},
                    json=payload
                )
                # Parse the JSON response from the deep search provider
                sr_json = r.json()
                # Append the deep search results as a system message in JSON string format
                msgs.append({"role": "system", "content": json.dumps(sr_json)})
        except Exception:
            # If any error occurs during deep search, fail silently without interrupting the chat flow
            pass
        # Append the combined AI instructions after the deep search content to maintain context
        msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})

    # If deep search is not enabled but the primary model is selected, use only the combined AI instructions
    elif model_display == MODEL_CHOICES[0]:
        msgs.append({"role": "system", "content": COMBINED_AI_INSTRUCTIONS})

    # For other models, use a custom instructions if provided, otherwise default to the system instructions mapping or default instructions
    else:
        msgs.append({"role": "system", "content": custom_prompt or SYSTEM_PROMPT_MAPPING.get(model_key, SYSTEM_PROMPT_DEFAULT)})

    # Append the conversation history to the message list, alternating user and assistant messages
    # First add all user messages from history
    msgs.extend([{"role": "user", "content": u} for u, _ in history])
    # Then add all assistant messages from history that are not empty
    msgs.extend([{"role": "assistant", "content": a} for _, a in history if a])

    # Append the current user input as the latest user message
    msgs.append({"role": "user", "content": user_input})

    # Create a list of all possible combinations of backend hosts and provider keys for load balancing and fallback
    candidates = [(h, k) for h in LINUX_SERVER_HOSTS for k in LINUX_SERVER_PROVIDER_KEYS]

    # Randomly shuffle the list of host-key pairs to distribute load evenly and avoid bias
    random.shuffle(candidates)

    # Iterate over each host and key pair to attempt fetching a streamed response
    for h, k in candidates:
        # Call the async generator function to fetch streamed response chunks from the backend
        stream_gen = fetch_response_stream_async(
            h, k, model_key, msgs, cfg, sess.session_id, sess.stop_event, sess.cancel_token
        )

        # Flag to track if any response chunks were received from this provider
        got_responses = False

        # Asynchronously iterate over each chunk yielded by the streaming generator
        async for chunk in stream_gen:
            # If the stop event is set or cancellation requested, terminate streaming immediately
            if sess.stop_event.is_set() or sess.cancel_token["cancelled"]:
                return

            # Mark that at least one response chunk has been received
            got_responses = True

            # Yield the current chunk to the caller for incremental UI update or processing
            yield chunk

        # If any responses were received from this host-key pair, stop trying others and return
        if got_responses:
            return

    # If no responses were received from any provider, yield a fallback message indicating failure
    yield ("content", RESPONSES["RESPONSE_2"])