import { ChatCompletionRequestMessage } from "openai" import { GPTTokens } from "gpt-tokens" import { openai } from "./openai.mts" import { runModerationCheck } from "./runModerationCheck.mts" import { getUserContent } from "./getUserContent.mts" import { getTextPrompt } from "./getTextPrompt.mts" export const createChatCompletion = async ( messages: ChatCompletionRequestMessage[], model = "gpt-4" ): Promise => { // this is the part added by the user, and the one we need to check against the moderation API const userContent = getUserContent(messages) const check = await runModerationCheck(userContent) if (check.flagged) { console.error("Thoughtcrime: content flagged by the AI police", { userContent, moderationResult: check, }) return "Thoughtcrime: content flagged by the AI police" } const rawPrompt = getTextPrompt(messages) // for doc: https://www.npmjs.com/package/gpt-tokens const usageInfo = new GPTTokens({ // Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens) plus : false, model : "gpt-4", messages: messages as any, }) console.table({ "Tokens prompt": usageInfo.promptUsedTokens, "Tokens completion": usageInfo.completionUsedTokens, "Tokens total": usageInfo.usedTokens, }) // Price USD: 0.000298 console.log("Price USD: ", usageInfo.usedUSD) // const tokenLimit = 4000 const maxTokens = 4000 - usageInfo.promptUsedTokens console.log("maxTokens:", maxTokens) /* console.log("settings:", { tokenLimit, promptLength: rawPrompt.length, promptTokenLengh: rawPrompt.length / 1.9, maxTokens }) console.log("createChatCompletion(): raw prompt length:", rawPrompt.length) console.log( `createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables` ) */ console.log("query:", { model, // messages, user: "Anonymous User", temperature: 0.7, max_tokens: maxTokens, // stop: preset.stop?.length ? preset.stop : undefined, }) const response = await openai.createChatCompletion({ model, messages, // TODO use the Hugging Face Login username here user: "Anonymous User", temperature: 0.7, // 30 tokens is about 120 characters // we don't want more, as it will take longer to respond max_tokens: maxTokens, // stop: preset.stop?.length ? preset.stop : undefined, }) const { choices } = response.data if (!choices.length) { console.log("createChatCompletion(): no choice found in the LLM response..") return "" } const firstChoice = choices[0] if (firstChoice?.message?.role !== "assistant") { console.log( "createChatCompletion(): something went wrong, the model imagined the user response?!" ) return "" } console.log("createChatCompletion(): response", firstChoice.message.content) return firstChoice.message.content || "" }