import requests from typing import List, Dict, Any, Optional, Union from .exceptions import UnauthorizedError class ChatClient: def __init__(self, base_url: str, api_key: Optional[str] = None): """ Initialize the ChatClient. Args: base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000") api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token. """ self._base_url = base_url.rstrip("/") # Remove trailing slash if present self._api_key = api_key def _get_headers(self) -> Dict[str, str]: """ Get the headers for API requests, including authorization if api_key is set. Returns: Dict[str, str]: Headers to use for API requests """ headers = {"Content-Type": "application/json"} if self._api_key: headers["Authorization"] = f"Bearer {self._api_key}" return headers def completions( self, model: str, messages: List[Dict[str, str]], temperature: Optional[float] = None, top_p: Optional[float] = None, n: Optional[int] = None, max_tokens: Optional[int] = None, presence_penalty: Optional[float] = None, frequency_penalty: Optional[float] = None, user: Optional[str] = None, return_request: bool = False, ) -> Union[Dict[str, Any], requests.Request]: """ Create a chat completion. Args: model (str): The model to use for completion messages (List[Dict[str, str]]): The messages to generate a completion for temperature (Optional[float]): Sampling temperature between 0 and 2 top_p (Optional[float]): Nucleus sampling parameter between 0 and 1 n (Optional[int]): Number of completions to generate max_tokens (Optional[int]): Maximum number of tokens to generate presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0 frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0 user (Optional[str]): Unique identifier for the end user return_request (bool): If True, returns the prepared request object instead of executing it Returns: Union[Dict[str, Any], requests.Request]: Either the completion response from the server or a prepared request object if return_request is True Raises: UnauthorizedError: If the request fails with a 401 status code requests.exceptions.RequestException: If the request fails with any other error """ url = f"{self._base_url}/chat/completions" # Build request data with required fields data: Dict[str, Any] = {"model": model, "messages": messages} # Add optional parameters if provided if temperature is not None: data["temperature"] = temperature if top_p is not None: data["top_p"] = top_p if n is not None: data["n"] = n if max_tokens is not None: data["max_tokens"] = max_tokens if presence_penalty is not None: data["presence_penalty"] = presence_penalty if frequency_penalty is not None: data["frequency_penalty"] = frequency_penalty if user is not None: data["user"] = user request = requests.Request("POST", url, headers=self._get_headers(), json=data) if return_request: return request # Prepare and send the request session = requests.Session() try: response = session.send(request.prepare()) response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: if e.response.status_code == 401: raise UnauthorizedError(e) raise