""" Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer This is OpenAI compatible This file only contains param mapping logic API calling is done using the OpenAI SDK with an api_base """ from typing import Optional, Union from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig class NvidiaNimConfig(OpenAIGPTConfig): """ Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: """ temperature: Optional[int] = None top_p: Optional[int] = None frequency_penalty: Optional[int] = None presence_penalty: Optional[int] = None max_tokens: Optional[int] = None stop: Optional[Union[str, list]] = None def __init__( self, temperature: Optional[int] = None, top_p: Optional[int] = None, frequency_penalty: Optional[int] = None, presence_penalty: Optional[int] = None, max_tokens: Optional[int] = None, stop: Optional[Union[str, list]] = None, ) -> None: locals_ = locals().copy() for key, value in locals_.items(): if key != "self" and value is not None: setattr(self.__class__, key, value) @classmethod def get_config(cls): return super().get_config() def get_supported_openai_params(self, model: str) -> list: """ Get the supported OpenAI params for the given model Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference """ if model in [ "google/recurrentgemma-2b", "google/gemma-2-27b-it", "google/gemma-2-9b-it", "gemma-2-9b-it", ]: return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] elif model == "nvidia/nemotron-4-340b-instruct": return [ "stream", "temperature", "top_p", "max_tokens", "max_completion_tokens", ] elif model == "nvidia/nemotron-4-340b-reward": return [ "stream", ] elif model in ["google/codegemma-1.1-7b"]: # most params - but no 'seed' :( return [ "stream", "temperature", "top_p", "frequency_penalty", "presence_penalty", "max_tokens", "max_completion_tokens", "stop", ] else: # DEFAULT Case - The vast majority of Nvidia NIM Models lie here # "upstage/solar-10.7b-instruct", # "snowflake/arctic", # "seallms/seallm-7b-v2.5", # "nvidia/llama3-chatqa-1.5-8b", # "nvidia/llama3-chatqa-1.5-70b", # "mistralai/mistral-large", # "mistralai/mixtral-8x22b-instruct-v0.1", # "mistralai/mixtral-8x7b-instruct-v0.1", # "mistralai/mistral-7b-instruct-v0.3", # "mistralai/mistral-7b-instruct-v0.2", # "mistralai/codestral-22b-instruct-v0.1", # "microsoft/phi-3-small-8k-instruct", # "microsoft/phi-3-small-128k-instruct", # "microsoft/phi-3-mini-4k-instruct", # "microsoft/phi-3-mini-128k-instruct", # "microsoft/phi-3-medium-4k-instruct", # "microsoft/phi-3-medium-128k-instruct", # "meta/llama3-70b-instruct", # "meta/llama3-8b-instruct", # "meta/llama2-70b", # "meta/codellama-70b", return [ "stream", "temperature", "top_p", "frequency_penalty", "presence_penalty", "max_tokens", "max_completion_tokens", "stop", "seed", ] def map_openai_params( self, non_default_params: dict, optional_params: dict, model: str, drop_params: bool, ) -> dict: supported_openai_params = self.get_supported_openai_params(model=model) for param, value in non_default_params.items(): if param == "max_completion_tokens": optional_params["max_tokens"] = value elif param in supported_openai_params: optional_params[param] = value return optional_params