from enum import Enum from typing import Optional, Dict, List, Union, Literal, Any from openai.types.chat import ( ChatCompletionMessageParam, ChatCompletionToolChoiceOptionParam, ) from openai.types.chat.completion_create_params import FunctionCall, ResponseFormat from openai.types.create_embedding_response import Usage from pydantic import BaseModel class Role(str, Enum): USER = "user" ASSISTANT = "assistant" SYSTEM = "system" FUNCTION = "function" TOOL = "tool" class ErrorResponse(BaseModel): object: str = "error" message: str code: int class ChatCompletionCreateParams(BaseModel): messages: List[ChatCompletionMessageParam] """A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models). """ model: str """ID of the model to use. See the [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API. """ frequency_penalty: Optional[float] = 0. """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) """ function_call: Optional[FunctionCall] = None """Deprecated in favor of `tool_choice`. Controls which (if any) function is called by the model. `none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a function. Specifying a particular function via `{"name": "my_function"}` forces the model to call that function. `none` is the default when no functions are present. `auto`` is the default if functions are present. """ functions: Optional[List] = None """Deprecated in favor of `tools`. A list of functions the model may generate JSON inputs for. """ logit_bias: Optional[Dict[str, int]] = None """Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. """ max_tokens: Optional[int] = None """The maximum number of [tokens](/tokenizer) to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. """ n: Optional[int] = 1 """How many chat completion choices to generate for each input message.""" presence_penalty: Optional[float] = 0. """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) """ response_format: Optional[ResponseFormat] = None """An object specifying the format that the model must output. Used to enable JSON mode. """ seed: Optional[int] = None """This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. """ stop: Optional[Union[str, List[str]]] = None """Up to 4 sequences where the API will stop generating further tokens.""" temperature: Optional[float] = 0.9 """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. """ tool_choice: Optional[ChatCompletionToolChoiceOptionParam] = None """ Controls which (if any) function is called by the model. `none` means the model will not call a function and instead generates a message. `auto` means the model can pick between generating a message or calling a function. Specifying a particular function via `{"type: "function", "function": {"name": "my_function"}}` forces the model to call that function. `none` is the default when no functions are present. `auto` is the default if functions are present. """ tools: Optional[List] = None """A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. """ top_p: Optional[float] = 1.0 """ An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both. """ user: Optional[str] = None """ A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). """ stream: Optional[bool] = False """If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). """ # Addictional parameters repetition_penalty: Optional[float] = 1.03 """The parameter for repetition penalty. 1.0 means no penalty. See[this paper](https://arxiv.org / pdf / 1909.05858.pdf) for more details. """ typical_p: Optional[float] = None """Typical Decoding mass. See[Typical Decoding for Natural Language Generation](https://arxiv.org / abs / 2202.00666) for more information """ watermark: Optional[bool] = False """Watermarking with [A Watermark for Large Language Models](https://arxiv.org / abs / 2301.10226) """ best_of: Optional[int] = 1 ignore_eos: Optional[bool] = False use_beam_search: Optional[bool] = False stop_token_ids: Optional[List[int]] = None skip_special_tokens: Optional[bool] = True spaces_between_special_tokens: Optional[bool] = True min_p: Optional[float] = 0.0 class CompletionCreateParams(BaseModel): model: str """ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them. """ prompt: Union[str, List[str], List[int], List[List[int]], None] """ The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. """ best_of: Optional[int] = 1 """ Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed. When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. """ echo: Optional[bool] = False """Echo back the prompt in addition to the completion""" frequency_penalty: Optional[float] = 0. """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) """ logit_bias: Optional[Dict[str, int]] = None """Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated. """ logprobs: Optional[int] = None """ Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response. The maximum value for `logprobs` is 5. """ max_tokens: Optional[int] = 16 """The maximum number of [tokens](/tokenizer) to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. """ n: Optional[int] = 1 """How many completions to generate for each prompt. **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`. """ presence_penalty: Optional[float] = 0. """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) """ seed: Optional[int] = None """ If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend. """ stop: Optional[Union[str, List[str]]] = None """Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence. """ suffix: Optional[str] = None """The suffix that comes after a completion of inserted text.""" temperature: Optional[float] = 1. """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. """ top_p: Optional[float] = 1. """ An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both. """ user: Optional[str] = None """ A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). """ stream: Optional[bool] = False """If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions). """ # Addictional parameters repetition_penalty: Optional[float] = 1.03 """The parameter for repetition penalty. 1.0 means no penalty. See[this paper](https://arxiv.org / pdf / 1909.05858.pdf) for more details. """ typical_p: Optional[float] = None """Typical Decoding mass. See[Typical Decoding for Natural Language Generation](https://arxiv.org / abs / 2202.00666) for more information """ watermark: Optional[bool] = False """Watermarking with [A Watermark for Large Language Models](https://arxiv.org / abs / 2301.10226) """ ignore_eos: Optional[bool] = False use_beam_search: Optional[bool] = False stop_token_ids: Optional[List[int]] = None skip_special_tokens: Optional[bool] = True spaces_between_special_tokens: Optional[bool] = True min_p: Optional[float] = 0.0 class EmbeddingCreateParams(BaseModel): input: Union[str, List[str], List[int], List[List[int]]] """Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`) and cannot be an empty string. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. """ model: str """ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them. """ encoding_format: Literal["float", "base64"] = "float" """The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). """ user: Optional[str] = None """ A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). """ class Embedding(BaseModel): embedding: Any """The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). """ index: int """The index of the embedding in the list of embeddings.""" object: Literal["embedding"] """The object type, which is always "embedding".""" class CreateEmbeddingResponse(BaseModel): data: List[Embedding] """The list of embeddings generated by the model.""" model: str """The name of the model used to generate the embedding.""" object: Literal["list"] """The object type, which is always "list".""" usage: Usage """The usage information for the request."""