Spaces:
Runtime error
Runtime error
const crypto = require('crypto'); | |
const Keyv = require('keyv'); | |
const { | |
encoding_for_model: encodingForModel, | |
get_encoding: getEncoding, | |
} = require('@dqbd/tiktoken'); | |
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source'); | |
const { Agent, ProxyAgent } = require('undici'); | |
const BaseClient = require('./BaseClient'); | |
const CHATGPT_MODEL = 'gpt-3.5-turbo'; | |
const tokenizersCache = {}; | |
class ChatGPTClient extends BaseClient { | |
constructor(apiKey, options = {}, cacheOptions = {}) { | |
super(apiKey, options, cacheOptions); | |
cacheOptions.namespace = cacheOptions.namespace || 'chatgpt'; | |
this.conversationsCache = new Keyv(cacheOptions); | |
this.setOptions(options); | |
} | |
setOptions(options) { | |
if (this.options && !this.options.replaceOptions) { | |
// nested options aren't spread properly, so we need to do this manually | |
this.options.modelOptions = { | |
...this.options.modelOptions, | |
...options.modelOptions, | |
}; | |
delete options.modelOptions; | |
// now we can merge options | |
this.options = { | |
...this.options, | |
...options, | |
}; | |
} else { | |
this.options = options; | |
} | |
if (this.options.openaiApiKey) { | |
this.apiKey = this.options.openaiApiKey; | |
} | |
const modelOptions = this.options.modelOptions || {}; | |
this.modelOptions = { | |
...modelOptions, | |
// set some good defaults (check for undefined in some cases because they may be 0) | |
model: modelOptions.model || CHATGPT_MODEL, | |
temperature: typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature, | |
top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p, | |
presence_penalty: | |
typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty, | |
stop: modelOptions.stop, | |
}; | |
this.isChatGptModel = this.modelOptions.model.startsWith('gpt-'); | |
const { isChatGptModel } = this; | |
this.isUnofficialChatGptModel = | |
this.modelOptions.model.startsWith('text-chat') || | |
this.modelOptions.model.startsWith('text-davinci-002-render'); | |
const { isUnofficialChatGptModel } = this; | |
// Davinci models have a max context length of 4097 tokens. | |
this.maxContextTokens = this.options.maxContextTokens || (isChatGptModel ? 4095 : 4097); | |
// I decided to reserve 1024 tokens for the response. | |
// The max prompt tokens is determined by the max context tokens minus the max response tokens. | |
// Earlier messages will be dropped until the prompt is within the limit. | |
this.maxResponseTokens = this.modelOptions.max_tokens || 1024; | |
this.maxPromptTokens = | |
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens; | |
if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) { | |
throw new Error( | |
`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${ | |
this.maxPromptTokens + this.maxResponseTokens | |
}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`, | |
); | |
} | |
this.userLabel = this.options.userLabel || 'User'; | |
this.chatGptLabel = this.options.chatGptLabel || 'ChatGPT'; | |
if (isChatGptModel) { | |
// Use these faux tokens to help the AI understand the context since we are building the chat log ourselves. | |
// Trying to use "<|im_start|>" causes the AI to still generate "<" or "<|" at the end sometimes for some reason, | |
// without tripping the stop sequences, so I'm using "||>" instead. | |
this.startToken = '||>'; | |
this.endToken = ''; | |
this.gptEncoder = this.constructor.getTokenizer('cl100k_base'); | |
} else if (isUnofficialChatGptModel) { | |
this.startToken = '<|im_start|>'; | |
this.endToken = '<|im_end|>'; | |
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, { | |
'<|im_start|>': 100264, | |
'<|im_end|>': 100265, | |
}); | |
} else { | |
// Previously I was trying to use "<|endoftext|>" but there seems to be some bug with OpenAI's token counting | |
// system that causes only the first "<|endoftext|>" to be counted as 1 token, and the rest are not treated | |
// as a single token. So we're using this instead. | |
this.startToken = '||>'; | |
this.endToken = ''; | |
try { | |
this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true); | |
} catch { | |
this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true); | |
} | |
} | |
if (!this.modelOptions.stop) { | |
const stopTokens = [this.startToken]; | |
if (this.endToken && this.endToken !== this.startToken) { | |
stopTokens.push(this.endToken); | |
} | |
stopTokens.push(`\n${this.userLabel}:`); | |
stopTokens.push('<|diff_marker|>'); | |
// I chose not to do one for `chatGptLabel` because I've never seen it happen | |
this.modelOptions.stop = stopTokens; | |
} | |
if (this.options.reverseProxyUrl) { | |
this.completionsUrl = this.options.reverseProxyUrl; | |
} else if (isChatGptModel) { | |
this.completionsUrl = 'https://api.openai.com/v1/chat/completions'; | |
} else { | |
this.completionsUrl = 'https://api.openai.com/v1/completions'; | |
} | |
return this; | |
} | |
static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) { | |
if (tokenizersCache[encoding]) { | |
return tokenizersCache[encoding]; | |
} | |
let tokenizer; | |
if (isModelName) { | |
tokenizer = encodingForModel(encoding, extendSpecialTokens); | |
} else { | |
tokenizer = getEncoding(encoding, extendSpecialTokens); | |
} | |
tokenizersCache[encoding] = tokenizer; | |
return tokenizer; | |
} | |
async getCompletion(input, onProgress, abortController = null) { | |
if (!abortController) { | |
abortController = new AbortController(); | |
} | |
const modelOptions = { ...this.modelOptions }; | |
if (typeof onProgress === 'function') { | |
modelOptions.stream = true; | |
} | |
if (this.isChatGptModel) { | |
modelOptions.messages = input; | |
} else { | |
modelOptions.prompt = input; | |
} | |
const { debug } = this.options; | |
const url = this.completionsUrl; | |
if (debug) { | |
console.debug(); | |
console.debug(url); | |
console.debug(modelOptions); | |
console.debug(); | |
} | |
const opts = { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json', | |
}, | |
body: JSON.stringify(modelOptions), | |
dispatcher: new Agent({ | |
bodyTimeout: 0, | |
headersTimeout: 0, | |
}), | |
}; | |
if (this.apiKey && this.options.azure) { | |
opts.headers['api-key'] = this.apiKey; | |
} else if (this.apiKey) { | |
opts.headers.Authorization = `Bearer ${this.apiKey}`; | |
} | |
if (this.options.headers) { | |
opts.headers = { ...opts.headers, ...this.options.headers }; | |
} | |
if (this.options.proxy) { | |
opts.dispatcher = new ProxyAgent(this.options.proxy); | |
} | |
if (modelOptions.stream) { | |
// eslint-disable-next-line no-async-promise-executor | |
return new Promise(async (resolve, reject) => { | |
try { | |
let done = false; | |
await fetchEventSource(url, { | |
...opts, | |
signal: abortController.signal, | |
async onopen(response) { | |
if (response.status === 200) { | |
return; | |
} | |
if (debug) { | |
console.debug(response); | |
} | |
let error; | |
try { | |
const body = await response.text(); | |
error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`); | |
error.status = response.status; | |
error.json = JSON.parse(body); | |
} catch { | |
error = error || new Error(`Failed to send message. HTTP ${response.status}`); | |
} | |
throw error; | |
}, | |
onclose() { | |
if (debug) { | |
console.debug('Server closed the connection unexpectedly, returning...'); | |
} | |
// workaround for private API not sending [DONE] event | |
if (!done) { | |
onProgress('[DONE]'); | |
abortController.abort(); | |
resolve(); | |
} | |
}, | |
onerror(err) { | |
if (debug) { | |
console.debug(err); | |
} | |
// rethrow to stop the operation | |
throw err; | |
}, | |
onmessage(message) { | |
if (debug) { | |
// console.debug(message); | |
} | |
if (!message.data || message.event === 'ping') { | |
return; | |
} | |
if (message.data === '[DONE]') { | |
onProgress('[DONE]'); | |
abortController.abort(); | |
resolve(); | |
done = true; | |
return; | |
} | |
onProgress(JSON.parse(message.data)); | |
}, | |
}); | |
} catch (err) { | |
reject(err); | |
} | |
}); | |
} | |
const response = await fetch(url, { | |
...opts, | |
signal: abortController.signal, | |
}); | |
if (response.status !== 200) { | |
const body = await response.text(); | |
const error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`); | |
error.status = response.status; | |
try { | |
error.json = JSON.parse(body); | |
} catch { | |
error.body = body; | |
} | |
throw error; | |
} | |
return response.json(); | |
} | |
async generateTitle(userMessage, botMessage) { | |
const instructionsPayload = { | |
role: 'system', | |
content: `Write an extremely concise subtitle for this conversation with no more than a few words. All words should be capitalized. Exclude punctuation. | |
||>Message: | |
${userMessage.message} | |
||>Response: | |
${botMessage.message} | |
||>Title:`, | |
}; | |
const titleGenClientOptions = JSON.parse(JSON.stringify(this.options)); | |
titleGenClientOptions.modelOptions = { | |
model: 'gpt-3.5-turbo', | |
temperature: 0, | |
presence_penalty: 0, | |
frequency_penalty: 0, | |
}; | |
const titleGenClient = new ChatGPTClient(this.apiKey, titleGenClientOptions); | |
const result = await titleGenClient.getCompletion([instructionsPayload], null); | |
// remove any non-alphanumeric characters, replace multiple spaces with 1, and then trim | |
return result.choices[0].message.content | |
.replace(/[^a-zA-Z0-9' ]/g, '') | |
.replace(/\s+/g, ' ') | |
.trim(); | |
} | |
async sendMessage(message, opts = {}) { | |
if (opts.clientOptions && typeof opts.clientOptions === 'object') { | |
this.setOptions(opts.clientOptions); | |
} | |
const conversationId = opts.conversationId || crypto.randomUUID(); | |
const parentMessageId = opts.parentMessageId || crypto.randomUUID(); | |
let conversation = | |
typeof opts.conversation === 'object' | |
? opts.conversation | |
: await this.conversationsCache.get(conversationId); | |
let isNewConversation = false; | |
if (!conversation) { | |
conversation = { | |
messages: [], | |
createdAt: Date.now(), | |
}; | |
isNewConversation = true; | |
} | |
const shouldGenerateTitle = opts.shouldGenerateTitle && isNewConversation; | |
const userMessage = { | |
id: crypto.randomUUID(), | |
parentMessageId, | |
role: 'User', | |
message, | |
}; | |
conversation.messages.push(userMessage); | |
// Doing it this way instead of having each message be a separate element in the array seems to be more reliable, | |
// especially when it comes to keeping the AI in character. It also seems to improve coherency and context retention. | |
const { prompt: payload, context } = await this.buildPrompt( | |
conversation.messages, | |
userMessage.id, | |
{ | |
isChatGptModel: this.isChatGptModel, | |
promptPrefix: opts.promptPrefix, | |
}, | |
); | |
if (this.options.keepNecessaryMessagesOnly) { | |
conversation.messages = context; | |
} | |
let reply = ''; | |
let result = null; | |
if (typeof opts.onProgress === 'function') { | |
await this.getCompletion( | |
payload, | |
(progressMessage) => { | |
if (progressMessage === '[DONE]') { | |
return; | |
} | |
const token = this.isChatGptModel | |
? progressMessage.choices[0].delta.content | |
: progressMessage.choices[0].text; | |
// first event's delta content is always undefined | |
if (!token) { | |
return; | |
} | |
if (this.options.debug) { | |
console.debug(token); | |
} | |
if (token === this.endToken) { | |
return; | |
} | |
opts.onProgress(token); | |
reply += token; | |
}, | |
opts.abortController || new AbortController(), | |
); | |
} else { | |
result = await this.getCompletion( | |
payload, | |
null, | |
opts.abortController || new AbortController(), | |
); | |
if (this.options.debug) { | |
console.debug(JSON.stringify(result)); | |
} | |
if (this.isChatGptModel) { | |
reply = result.choices[0].message.content; | |
} else { | |
reply = result.choices[0].text.replace(this.endToken, ''); | |
} | |
} | |
// avoids some rendering issues when using the CLI app | |
if (this.options.debug) { | |
console.debug(); | |
} | |
reply = reply.trim(); | |
const replyMessage = { | |
id: crypto.randomUUID(), | |
parentMessageId: userMessage.id, | |
role: 'ChatGPT', | |
message: reply, | |
}; | |
conversation.messages.push(replyMessage); | |
const returnData = { | |
response: replyMessage.message, | |
conversationId, | |
parentMessageId: replyMessage.parentMessageId, | |
messageId: replyMessage.id, | |
details: result || {}, | |
}; | |
if (shouldGenerateTitle) { | |
conversation.title = await this.generateTitle(userMessage, replyMessage); | |
returnData.title = conversation.title; | |
} | |
await this.conversationsCache.set(conversationId, conversation); | |
if (this.options.returnConversation) { | |
returnData.conversation = conversation; | |
} | |
return returnData; | |
} | |
async buildPrompt(messages, parentMessageId, { isChatGptModel = false, promptPrefix = null }) { | |
const orderedMessages = this.constructor.getMessagesForConversation(messages, parentMessageId); | |
promptPrefix = (promptPrefix || this.options.promptPrefix || '').trim(); | |
if (promptPrefix) { | |
// If the prompt prefix doesn't end with the end token, add it. | |
if (!promptPrefix.endsWith(`${this.endToken}`)) { | |
promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`; | |
} | |
promptPrefix = `${this.startToken}Instructions:\n${promptPrefix}`; | |
} else { | |
const currentDateString = new Date().toLocaleDateString('en-us', { | |
year: 'numeric', | |
month: 'long', | |
day: 'numeric', | |
}); | |
promptPrefix = `${this.startToken}Instructions:\nYou are ChatGPT, a large language model trained by OpenAI. Respond conversationally.\nCurrent date: ${currentDateString}${this.endToken}\n\n`; | |
} | |
const promptSuffix = `${this.startToken}${this.chatGptLabel}:\n`; // Prompt ChatGPT to respond. | |
const instructionsPayload = { | |
role: 'system', | |
name: 'instructions', | |
content: promptPrefix, | |
}; | |
const messagePayload = { | |
role: 'system', | |
content: promptSuffix, | |
}; | |
let currentTokenCount; | |
if (isChatGptModel) { | |
currentTokenCount = | |
this.getTokenCountForMessage(instructionsPayload) + | |
this.getTokenCountForMessage(messagePayload); | |
} else { | |
currentTokenCount = this.getTokenCount(`${promptPrefix}${promptSuffix}`); | |
} | |
let promptBody = ''; | |
const maxTokenCount = this.maxPromptTokens; | |
const context = []; | |
// Iterate backwards through the messages, adding them to the prompt until we reach the max token count. | |
// Do this within a recursive async function so that it doesn't block the event loop for too long. | |
const buildPromptBody = async () => { | |
if (currentTokenCount < maxTokenCount && orderedMessages.length > 0) { | |
const message = orderedMessages.pop(); | |
const roleLabel = | |
message?.isCreatedByUser || message?.role?.toLowerCase() === 'user' | |
? this.userLabel | |
: this.chatGptLabel; | |
const messageString = `${this.startToken}${roleLabel}:\n${ | |
message?.text ?? message?.message | |
}${this.endToken}\n`; | |
let newPromptBody; | |
if (promptBody || isChatGptModel) { | |
newPromptBody = `${messageString}${promptBody}`; | |
} else { | |
// Always insert prompt prefix before the last user message, if not gpt-3.5-turbo. | |
// This makes the AI obey the prompt instructions better, which is important for custom instructions. | |
// After a bunch of testing, it doesn't seem to cause the AI any confusion, even if you ask it things | |
// like "what's the last thing I wrote?". | |
newPromptBody = `${promptPrefix}${messageString}${promptBody}`; | |
} | |
context.unshift(message); | |
const tokenCountForMessage = this.getTokenCount(messageString); | |
const newTokenCount = currentTokenCount + tokenCountForMessage; | |
if (newTokenCount > maxTokenCount) { | |
if (promptBody) { | |
// This message would put us over the token limit, so don't add it. | |
return false; | |
} | |
// This is the first message, so we can't add it. Just throw an error. | |
throw new Error( | |
`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`, | |
); | |
} | |
promptBody = newPromptBody; | |
currentTokenCount = newTokenCount; | |
// wait for next tick to avoid blocking the event loop | |
await new Promise((resolve) => setImmediate(resolve)); | |
return buildPromptBody(); | |
} | |
return true; | |
}; | |
await buildPromptBody(); | |
const prompt = `${promptBody}${promptSuffix}`; | |
if (isChatGptModel) { | |
messagePayload.content = prompt; | |
// Add 2 tokens for metadata after all messages have been counted. | |
currentTokenCount += 2; | |
} | |
// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response. | |
this.modelOptions.max_tokens = Math.min( | |
this.maxContextTokens - currentTokenCount, | |
this.maxResponseTokens, | |
); | |
if (this.options.debug) { | |
console.debug(`Prompt : ${prompt}`); | |
} | |
if (isChatGptModel) { | |
return { prompt: [instructionsPayload, messagePayload], context }; | |
} | |
return { prompt, context }; | |
} | |
getTokenCount(text) { | |
return this.gptEncoder.encode(text, 'all').length; | |
} | |
/** | |
* Algorithm adapted from "6. Counting tokens for chat API calls" of | |
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb | |
* | |
* An additional 2 tokens need to be added for metadata after all messages have been counted. | |
* | |
* @param {*} message | |
*/ | |
getTokenCountForMessage(message) { | |
let tokensPerMessage; | |
let nameAdjustment; | |
if (this.modelOptions.model.startsWith('gpt-4')) { | |
tokensPerMessage = 3; | |
nameAdjustment = 1; | |
} else { | |
tokensPerMessage = 4; | |
nameAdjustment = -1; | |
} | |
// Map each property of the message to the number of tokens it contains | |
const propertyTokenCounts = Object.entries(message).map(([key, value]) => { | |
// Count the number of tokens in the property value | |
const numTokens = this.getTokenCount(value); | |
// Adjust by `nameAdjustment` tokens if the property key is 'name' | |
const adjustment = key === 'name' ? nameAdjustment : 0; | |
return numTokens + adjustment; | |
}); | |
// Sum the number of tokens in all properties and add `tokensPerMessage` for metadata | |
return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage); | |
} | |
} | |
module.exports = ChatGPTClient; | |