Spaces:
Sleeping
Sleeping
""" | |
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | |
This is OpenAI compatible | |
This file only contains param mapping logic | |
API calling is done using the OpenAI SDK with an api_base | |
""" | |
from typing import Optional, Union | |
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig | |
class NvidiaNimConfig(OpenAIGPTConfig): | |
""" | |
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | |
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: | |
""" | |
temperature: Optional[int] = None | |
top_p: Optional[int] = None | |
frequency_penalty: Optional[int] = None | |
presence_penalty: Optional[int] = None | |
max_tokens: Optional[int] = None | |
stop: Optional[Union[str, list]] = None | |
def __init__( | |
self, | |
temperature: Optional[int] = None, | |
top_p: Optional[int] = None, | |
frequency_penalty: Optional[int] = None, | |
presence_penalty: Optional[int] = None, | |
max_tokens: Optional[int] = None, | |
stop: Optional[Union[str, list]] = None, | |
) -> None: | |
locals_ = locals().copy() | |
for key, value in locals_.items(): | |
if key != "self" and value is not None: | |
setattr(self.__class__, key, value) | |
def get_config(cls): | |
return super().get_config() | |
def get_supported_openai_params(self, model: str) -> list: | |
""" | |
Get the supported OpenAI params for the given model | |
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference | |
""" | |
if model in [ | |
"google/recurrentgemma-2b", | |
"google/gemma-2-27b-it", | |
"google/gemma-2-9b-it", | |
"gemma-2-9b-it", | |
]: | |
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] | |
elif model == "nvidia/nemotron-4-340b-instruct": | |
return [ | |
"stream", | |
"temperature", | |
"top_p", | |
"max_tokens", | |
"max_completion_tokens", | |
] | |
elif model == "nvidia/nemotron-4-340b-reward": | |
return [ | |
"stream", | |
] | |
elif model in ["google/codegemma-1.1-7b"]: | |
# most params - but no 'seed' :( | |
return [ | |
"stream", | |
"temperature", | |
"top_p", | |
"frequency_penalty", | |
"presence_penalty", | |
"max_tokens", | |
"max_completion_tokens", | |
"stop", | |
] | |
else: | |
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here | |
# "upstage/solar-10.7b-instruct", | |
# "snowflake/arctic", | |
# "seallms/seallm-7b-v2.5", | |
# "nvidia/llama3-chatqa-1.5-8b", | |
# "nvidia/llama3-chatqa-1.5-70b", | |
# "mistralai/mistral-large", | |
# "mistralai/mixtral-8x22b-instruct-v0.1", | |
# "mistralai/mixtral-8x7b-instruct-v0.1", | |
# "mistralai/mistral-7b-instruct-v0.3", | |
# "mistralai/mistral-7b-instruct-v0.2", | |
# "mistralai/codestral-22b-instruct-v0.1", | |
# "microsoft/phi-3-small-8k-instruct", | |
# "microsoft/phi-3-small-128k-instruct", | |
# "microsoft/phi-3-mini-4k-instruct", | |
# "microsoft/phi-3-mini-128k-instruct", | |
# "microsoft/phi-3-medium-4k-instruct", | |
# "microsoft/phi-3-medium-128k-instruct", | |
# "meta/llama3-70b-instruct", | |
# "meta/llama3-8b-instruct", | |
# "meta/llama2-70b", | |
# "meta/codellama-70b", | |
return [ | |
"stream", | |
"temperature", | |
"top_p", | |
"frequency_penalty", | |
"presence_penalty", | |
"max_tokens", | |
"max_completion_tokens", | |
"stop", | |
"seed", | |
] | |
def map_openai_params( | |
self, | |
non_default_params: dict, | |
optional_params: dict, | |
model: str, | |
drop_params: bool, | |
) -> dict: | |
supported_openai_params = self.get_supported_openai_params(model=model) | |
for param, value in non_default_params.items(): | |
if param == "max_completion_tokens": | |
optional_params["max_tokens"] = value | |
elif param in supported_openai_params: | |
optional_params[param] = value | |
return optional_params | |