from typing import Literal, cast from openai import AzureOpenAI from neollm.llm.abstract_llm import AbstractLLM from neollm.llm.gpt.abstract_gpt import AbstractGPT from neollm.types import ( APIPricing, ClientSettings, LLMSettings, Messages, Response, StreamResponse, ) from neollm.utils.utils import cprint, ensure_env_var, suport_unrecomended_env_var suport_unrecomended_env_var(old_key="AZURE_API_BASE", new_key="AZURE_OPENAI_ENDPOINT") suport_unrecomended_env_var(old_key="AZURE_API_VERSION", new_key="OPENAI_API_VERSION") # 0613なし suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT35", new_key="AZURE_ENGINE_GPT35T_0613") suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT35_16k", new_key="AZURE_ENGINE_GPT35T_16K_0613") suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT4", new_key="AZURE_ENGINE_GPT4_0613") suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT4_32k", new_key="AZURE_ENGINE_GPT4_32K_0613") # turbo抜け suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT35_0613", new_key="AZURE_ENGINE_GPT35T_0613") suport_unrecomended_env_var(old_key="AZURE_ENGINE_GPT35_16K_0613", new_key="AZURE_ENGINE_GPT35T_16K_0613") # Pricing: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/ SUPPORTED_MODELS = Literal[ "gpt-4o-2024-05-13", "gpt-4-turbo-2024-04-09", "gpt-3.5-turbo-0125", "gpt-4-turbo-0125", "gpt-3.5-turbo-1106", "gpt-4-turbo-1106", "gpt-4v-turbo-1106", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-4-0613", "gpt-4-32k-0613", ] def get_azure_llm(model_name: SUPPORTED_MODELS | str, client_settings: ClientSettings) -> AbstractLLM: # 表記変更 model_name = model_name.replace("gpt-35-turbo", "gpt-3.5-turbo") # Add 日付 replace_map_for_nodate: dict[str, SUPPORTED_MODELS] = { "gpt-4o": "gpt-4o-2024-05-13", "gpt-3.5-turbo": "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613", "gpt-4": "gpt-4-0613", "gpt-4-32k": "gpt-4-32k-0613", "gpt-4-turbo": "gpt-4-turbo-1106", "gpt-4v-turbo": "gpt-4v-turbo-1106", } if model_name in replace_map_for_nodate: cprint("WARNING: model_nameに日付を指定してください", color="yellow", background=True) print(f"model_name: {model_name} -> {replace_map_for_nodate[model_name]}") model_name = replace_map_for_nodate[model_name] # map to LLM supported_model_map: dict[SUPPORTED_MODELS, AbstractLLM] = { "gpt-4o-2024-05-13": AzureGPT4O_20240513(client_settings), "gpt-4-turbo-2024-04-09": AzureGPT4T_20240409(client_settings), "gpt-3.5-turbo-0125": AzureGPT35T_0125(client_settings), "gpt-4-turbo-0125": AzureGPT4T_0125(client_settings), "gpt-3.5-turbo-1106": AzureGPT35T_1106(client_settings), "gpt-4-turbo-1106": AzureGPT4T_1106(client_settings), "gpt-4v-turbo-1106": AzureGPT4VT_1106(client_settings), "gpt-3.5-turbo-0613": AzureGPT35T_0613(client_settings), "gpt-3.5-turbo-16k-0613": AzureGPT35T16k_0613(client_settings), "gpt-4-0613": AzureGPT4_0613(client_settings), "gpt-4-32k-0613": AzureGPT432k_0613(client_settings), } # 通常モデル if model_name in supported_model_map: model_name = cast(SUPPORTED_MODELS, model_name) return supported_model_map[model_name] # FTモデル return AzureGPT35FT(model_name, client_settings) class AzureLLM(AbstractGPT): _engine_name_env_key: str | None = None @property def client(self) -> AzureOpenAI: client: AzureOpenAI = AzureOpenAI(**self.client_settings) # api_key: str | None = (None,) # timeout: httpx.Timeout(timeout=600.0, connect=5.0) # max_retries: int = 2 return client @property def engine(self) -> str: return ensure_env_var(self._engine_name_env_key) def generate(self, messages: Messages, llm_settings: LLMSettings) -> Response: openai_response = self.client.chat.completions.create( model=self.engine, messages=self._convert_to_platform_messages(messages), stream=False, **llm_settings, ) response = self._convert_to_response(openai_response) return response def generate_stream(self, messages: Messages, llm_settings: LLMSettings) -> StreamResponse: platform_stream_response = self.client.chat.completions.create( model=self.engine, messages=self._convert_to_platform_messages(messages), stream=True, **llm_settings, ) stream_response = self._convert_to_streamresponse(platform_stream_response) return stream_response # omni 2024-05-13 -------------------------------------------------------------------------------------------- class AzureGPT4O_20240513(AzureLLM): dollar_per_ktoken = APIPricing(input=0.005, output=0.015) # 30倍/45倍 model: str = "gpt-4o-2024-05-13" _engine_name_env_key: str = "AZURE_ENGINE_GPT4O_20240513" context_window: int = 128_000 # 2024-04-09 -------------------------------------------------------------------------------------------- class AzureGPT4T_20240409(AzureLLM): dollar_per_ktoken = APIPricing(input=0.01, output=0.03) model: str = "gpt-4-turbo-2024-04-09" _engine_name_env_key: str = "AZURE_ENGINE_GPT4T_20240409" context_window: int = 128_000 # 0125 -------------------------------------------------------------------------------------------- class AzureGPT35T_0125(AzureLLM): dollar_per_ktoken = APIPricing(input=0.0005, output=0.0015) model: str = "gpt-3.5-turbo-0125" _engine_name_env_key: str = "AZURE_ENGINE_GPT35T_0125" context_window: int = 16_385 class AzureGPT4T_0125(AzureLLM): dollar_per_ktoken = APIPricing(input=0.01, output=0.03) model: str = "gpt-4-turbo-0125" _engine_name_env_key: str = "AZURE_ENGINE_GPT4T_0125" context_window: int = 128_000 # 1106 -------------------------------------------------------------------------------------------- class AzureGPT35T_1106(AzureLLM): dollar_per_ktoken = APIPricing(input=0.001, output=0.002) model: str = "gpt-3.5-turbo-1106" _engine_name_env_key: str = "AZURE_ENGINE_GPT35T_1106" context_window: int = 16_385 class AzureGPT4VT_1106(AzureLLM): dollar_per_ktoken = APIPricing(input=0.01, output=0.03) # 10倍/15倍 model: str = "gpt-4-1106-vision-preview" _engine_name_env_key: str = "AZURE_ENGINE_GPT4VT_1106" context_window: int = 128_000 class AzureGPT4T_1106(AzureLLM): dollar_per_ktoken = APIPricing(input=0.01, output=0.03) model: str = "gpt-4-turbo-1106" _engine_name_env_key: str = "AZURE_ENGINE_GPT4T_1106" context_window: int = 128_000 # FT -------------------------------------------------------------------------------------------- class AzureGPT35FT(AzureLLM): dollar_per_ktoken = APIPricing(input=0.0005, output=0.0015) # 1倍 + セッション稼働時間 model: str = "gpt-3.5-turbo-ft" context_window: int = 4_096 def __init__(self, model_name: str, client_setting: ClientSettings) -> None: super().__init__(client_setting) self._engine = model_name @property def engine(self) -> str: return self._engine # 0613 -------------------------------------------------------------------------------------------- class AzureGPT35T_0613(AzureLLM): dollar_per_ktoken = APIPricing(input=0.0015, output=0.002) model: str = "gpt-3.5-turbo-0613" _engine_name_env_key: str = "AZURE_ENGINE_GPT35T_0613" context_window: int = 4_096 class AzureGPT35T16k_0613(AzureLLM): dollar_per_ktoken = APIPricing(input=0.003, output=0.004) # 2倍 model: str = "gpt-3.5-turbo-16k-0613" _engine_name_env_key: str = "AZURE_ENGINE_GPT35T_16K_0613" context_window: int = 16_385 class AzureGPT4_0613(AzureLLM): dollar_per_ktoken = APIPricing(input=0.03, output=0.06) # 20倍/30倍 model: str = "gpt-4-0613" _engine_name_env_key: str = "AZURE_ENGINE_GPT4_0613" context_window: int = 8_192 class AzureGPT432k_0613(AzureLLM): dollar_per_ktoken = APIPricing(input=0.06, output=0.12) # 40倍/60倍 model: str = "gpt-4-32k-0613" _engine_name_env_key: str = "AZURE_ENGINE_GPT4_32K_0613" context_window: int = 32_768