sillytavern / src /endpoints /backends /text-completions.js
Nocigar's picture
Upload 72 files
1307964 verified
const express = require('express');
const fetch = require('node-fetch').default;
const _ = require('lodash');
const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS, FEATHERLESS_KEYS } = require('../../constants');
const { forwardFetchResponse, trimV1, getConfigValue } = require('../../util');
const { setAdditionalHeaders } = require('../../additional-headers');
const router = express.Router();
/**
* Special boy's steaming routine. Wrap this abomination into proper SSE stream.
* @param {import('node-fetch').Response} jsonStream JSON stream
* @param {import('express').Request} request Express request
* @param {import('express').Response} response Express response
* @returns {Promise<any>} Nothing valuable
*/
async function parseOllamaStream(jsonStream, request, response) {
try {
let partialData = '';
jsonStream.body.on('data', (data) => {
const chunk = data.toString();
partialData += chunk;
while (true) {
let json;
try {
json = JSON.parse(partialData);
} catch (e) {
break;
}
const text = json.response || '';
const chunk = { choices: [{ text }] };
response.write(`data: ${JSON.stringify(chunk)}\n\n`);
partialData = '';
}
});
request.socket.on('close', function () {
if (jsonStream.body instanceof Readable) jsonStream.body.destroy();
response.end();
});
jsonStream.body.on('end', () => {
console.log('Streaming request finished');
response.write('data: [DONE]\n\n');
response.end();
});
} catch (error) {
console.log('Error forwarding streaming response:', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
} else {
return response.end();
}
}
}
/**
* Abort KoboldCpp generation request.
* @param {string} url Server base URL
* @returns {Promise<void>} Promise resolving when we are done
*/
async function abortKoboldCppRequest(url) {
try {
console.log('Aborting Kobold generation...');
const abortResponse = await fetch(`${url}/api/extra/abort`, {
method: 'POST',
});
if (!abortResponse.ok) {
console.log('Error sending abort request to Kobold:', abortResponse.status, abortResponse.statusText);
}
} catch (error) {
console.log(error);
}
}
//************** Ooba/OpenAI text completions API
router.post('/status', jsonParser, async function (request, response) {
if (!request.body) return response.sendStatus(400);
try {
if (request.body.api_server.indexOf('localhost') !== -1) {
request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1');
}
console.log('Trying to connect to API:', request.body);
const baseUrl = trimV1(request.body.api_server);
const args = {
headers: { 'Content-Type': 'application/json' },
};
setAdditionalHeaders(request, args, baseUrl);
const apiType = request.body.api_type;
let url = baseUrl;
let result = '';
if (request.body.legacy_api) {
url += '/v1/model';
} else {
switch (apiType) {
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.KOBOLDCPP:
case TEXTGEN_TYPES.LLAMACPP:
case TEXTGEN_TYPES.INFERMATICAI:
case TEXTGEN_TYPES.OPENROUTER:
url += '/v1/models';
break;
case TEXTGEN_TYPES.DREAMGEN:
url += '/api/openai/v1/models';
break;
case TEXTGEN_TYPES.MANCER:
url += '/oai/v1/models';
break;
case TEXTGEN_TYPES.TABBY:
url += '/v1/model/list';
break;
case TEXTGEN_TYPES.TOGETHERAI:
url += '/api/models?&info';
break;
case TEXTGEN_TYPES.OLLAMA:
url += '/api/tags';
break;
case TEXTGEN_TYPES.FEATHERLESS:
url += '/v1/models';
break;
case TEXTGEN_TYPES.HUGGINGFACE:
url += '/info';
break;
}
}
const modelsReply = await fetch(url, args);
if (!modelsReply.ok) {
console.log('Models endpoint is offline.');
return response.status(400);
}
let data = await modelsReply.json();
if (request.body.legacy_api) {
console.log('Legacy API response:', data);
return response.send({ result: data?.result });
}
// Rewrap to OAI-like response
if (apiType === TEXTGEN_TYPES.TOGETHERAI && Array.isArray(data)) {
data = { data: data.map(x => ({ id: x.name, ...x })) };
}
if (apiType === TEXTGEN_TYPES.OLLAMA && Array.isArray(data.models)) {
data = { data: data.models.map(x => ({ id: x.name, ...x })) };
}
if (apiType === TEXTGEN_TYPES.HUGGINGFACE) {
data = { data: [] };
}
if (!Array.isArray(data.data)) {
console.log('Models response is not an array.');
return response.status(400);
}
const modelIds = data.data.map(x => x.id);
console.log('Models available:', modelIds);
// Set result to the first model ID
result = modelIds[0] || 'Valid';
if (apiType === TEXTGEN_TYPES.OOBA) {
try {
const modelInfoUrl = baseUrl + '/v1/internal/model/info';
const modelInfoReply = await fetch(modelInfoUrl, args);
if (modelInfoReply.ok) {
const modelInfo = await modelInfoReply.json();
console.log('Ooba model info:', modelInfo);
const modelName = modelInfo?.model_name;
result = modelName || result;
}
} catch (error) {
console.error(`Failed to get Ooba model info: ${error}`);
}
} else if (apiType === TEXTGEN_TYPES.TABBY) {
try {
const modelInfoUrl = baseUrl + '/v1/model';
const modelInfoReply = await fetch(modelInfoUrl, args);
if (modelInfoReply.ok) {
const modelInfo = await modelInfoReply.json();
console.log('Tabby model info:', modelInfo);
const modelName = modelInfo?.id;
result = modelName || result;
} else {
// TabbyAPI returns an error 400 if a model isn't loaded
result = 'None';
}
} catch (error) {
console.error(`Failed to get TabbyAPI model info: ${error}`);
}
}
return response.send({ result, data: data.data });
} catch (error) {
console.error(error);
return response.status(500);
}
});
router.post('/generate', jsonParser, async function (request, response) {
if (!request.body) return response.sendStatus(400);
try {
if (request.body.api_server.indexOf('localhost') !== -1) {
request.body.api_server = request.body.api_server.replace('localhost', '127.0.0.1');
}
const apiType = request.body.api_type;
const baseUrl = request.body.api_server;
console.log(request.body);
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', async function () {
if (request.body.api_type === TEXTGEN_TYPES.KOBOLDCPP && !response.writableEnded) {
await abortKoboldCppRequest(trimV1(baseUrl));
}
controller.abort();
});
let url = trimV1(baseUrl);
if (request.body.legacy_api) {
url += '/v1/generate';
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.FEATHERLESS:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.TABBY:
case TEXTGEN_TYPES.KOBOLDCPP:
case TEXTGEN_TYPES.TOGETHERAI:
case TEXTGEN_TYPES.INFERMATICAI:
case TEXTGEN_TYPES.HUGGINGFACE:
url += '/v1/completions';
break;
case TEXTGEN_TYPES.DREAMGEN:
url += '/api/openai/v1/completions';
break;
case TEXTGEN_TYPES.MANCER:
url += '/oai/v1/completions';
break;
case TEXTGEN_TYPES.LLAMACPP:
url += '/completion';
break;
case TEXTGEN_TYPES.OLLAMA:
url += '/api/generate';
break;
case TEXTGEN_TYPES.OPENROUTER:
url += '/v1/chat/completions';
break;
}
}
const args = {
method: 'POST',
body: JSON.stringify(request.body),
headers: { 'Content-Type': 'application/json' },
signal: controller.signal,
timeout: 0,
};
setAdditionalHeaders(request, args, baseUrl);
if (request.body.api_type === TEXTGEN_TYPES.TOGETHERAI) {
request.body = _.pickBy(request.body, (_, key) => TOGETHERAI_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.INFERMATICAI) {
request.body = _.pickBy(request.body, (_, key) => INFERMATICAI_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.FEATHERLESS) {
request.body = _.pickBy(request.body, (_, key) => FEATHERLESS_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.DREAMGEN) {
request.body = _.pickBy(request.body, (_, key) => DREAMGEN_KEYS.includes(key));
// NOTE: DreamGen sometimes get confused by the unusual formatting in the character cards.
request.body.stop?.push('### User', '## User');
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.OPENROUTER) {
if (Array.isArray(request.body.provider) && request.body.provider.length > 0) {
request.body.provider = {
allow_fallbacks: request.body.allow_fallbacks ?? true,
order: request.body.provider,
};
} else {
delete request.body.provider;
}
request.body = _.pickBy(request.body, (_, key) => OPENROUTER_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
const keepAlive = getConfigValue('ollama.keepAlive', -1);
args.body = JSON.stringify({
model: request.body.model,
prompt: request.body.prompt,
stream: request.body.stream ?? false,
keep_alive: keepAlive,
raw: true,
options: _.pickBy(request.body, (_, key) => OLLAMA_KEYS.includes(key)),
});
}
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA && request.body.stream) {
const stream = await fetch(url, args);
parseOllamaStream(stream, request, response);
} else if (request.body.stream) {
const completionsStream = await fetch(url, args);
// Pipe remote SSE stream to Express response
forwardFetchResponse(completionsStream, response);
}
else {
const completionsReply = await fetch(url, args);
if (completionsReply.ok) {
const data = await completionsReply.json();
console.log('Endpoint response:', data);
// Wrap legacy response to OAI completions format
if (request.body.legacy_api) {
const text = data?.results[0]?.text;
data['choices'] = [{ text }];
}
// Map InfermaticAI response to OAI completions format
if (apiType === TEXTGEN_TYPES.INFERMATICAI) {
data['choices'] = (data?.choices || []).map(choice => ({ text: choice?.message?.content || choice.text, logprobs: choice?.logprobs, index: choice?.index }));
}
return response.send(data);
} else {
const text = await completionsReply.text();
const errorBody = { error: true, status: completionsReply.status, response: text };
if (!response.headersSent) {
return response.send(errorBody);
}
return response.end();
}
}
} catch (error) {
let value = { error: true, status: error?.status, response: error?.statusText };
console.log('Endpoint error:', error);
if (!response.headersSent) {
return response.send(value);
}
return response.end();
}
});
const ollama = express.Router();
ollama.post('/download', jsonParser, async function (request, response) {
try {
if (!request.body.name || !request.body.api_server) return response.sendStatus(400);
const name = request.body.name;
const url = String(request.body.api_server).replace(/\/$/, '');
const fetchResponse = await fetch(`${url}/api/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
name: name,
stream: false,
}),
timeout: 0,
});
if (!fetchResponse.ok) {
console.log('Download error:', fetchResponse.status, fetchResponse.statusText);
return response.status(fetchResponse.status).send({ error: true });
}
return response.send({ ok: true });
} catch (error) {
console.error(error);
return response.status(500);
}
});
ollama.post('/caption-image', jsonParser, async function (request, response) {
try {
if (!request.body.server_url || !request.body.model) {
return response.sendStatus(400);
}
console.log('Ollama caption request:', request.body);
const baseUrl = trimV1(request.body.server_url);
const fetchResponse = await fetch(`${baseUrl}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: request.body.model,
prompt: request.body.prompt,
images: [request.body.image],
stream: false,
}),
timeout: 0,
});
if (!fetchResponse.ok) {
console.log('Ollama caption error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
const data = await fetchResponse.json();
console.log('Ollama caption response:', data);
const caption = data?.response || '';
if (!caption) {
console.log('Ollama caption is empty.');
return response.status(500).send({ error: true });
}
return response.send({ caption });
} catch (error) {
console.error(error);
return response.status(500);
}
});
const llamacpp = express.Router();
llamacpp.post('/caption-image', jsonParser, async function (request, response) {
try {
if (!request.body.server_url) {
return response.sendStatus(400);
}
console.log('LlamaCpp caption request:', request.body);
const baseUrl = trimV1(request.body.server_url);
const fetchResponse = await fetch(`${baseUrl}/completion`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
timeout: 0,
body: JSON.stringify({
prompt: `USER:[img-1]${String(request.body.prompt).trim()}\nASSISTANT:`,
image_data: [{ data: request.body.image, id: 1 }],
temperature: 0.1,
stream: false,
stop: ['USER:', '</s>'],
}),
});
if (!fetchResponse.ok) {
console.log('LlamaCpp caption error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
const data = await fetchResponse.json();
console.log('LlamaCpp caption response:', data);
const caption = data?.content || '';
if (!caption) {
console.log('LlamaCpp caption is empty.');
return response.status(500).send({ error: true });
}
return response.send({ caption });
} catch (error) {
console.error(error);
return response.status(500);
}
});
llamacpp.post('/props', jsonParser, async function (request, response) {
try {
if (!request.body.server_url) {
return response.sendStatus(400);
}
console.log('LlamaCpp props request:', request.body);
const baseUrl = trimV1(request.body.server_url);
const fetchResponse = await fetch(`${baseUrl}/props`, {
method: 'GET',
timeout: 0,
});
if (!fetchResponse.ok) {
console.log('LlamaCpp props error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
const data = await fetchResponse.json();
console.log('LlamaCpp props response:', data);
return response.send(data);
} catch (error) {
console.error(error);
return response.status(500);
}
});
llamacpp.post('/slots', jsonParser, async function (request, response) {
try {
if (!request.body.server_url) {
return response.sendStatus(400);
}
if (!/^(erase|info|restore|save)$/.test(request.body.action)) {
return response.sendStatus(400);
}
console.log('LlamaCpp slots request:', request.body);
const baseUrl = trimV1(request.body.server_url);
let fetchResponse;
if (request.body.action === 'info') {
fetchResponse = await fetch(`${baseUrl}/slots`, {
method: 'GET',
timeout: 0,
});
} else {
if (!/^\d+$/.test(request.body.id_slot)) {
return response.sendStatus(400);
}
if (request.body.action !== 'erase' && !request.body.filename) {
return response.sendStatus(400);
}
fetchResponse = await fetch(`${baseUrl}/slots/${request.body.id_slot}?action=${request.body.action}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
timeout: 0,
body: JSON.stringify({
filename: request.body.action !== 'erase' ? `${request.body.filename}` : undefined,
}),
});
}
if (!fetchResponse.ok) {
console.log('LlamaCpp slots error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
const data = await fetchResponse.json();
console.log('LlamaCpp slots response:', data);
return response.send(data);
} catch (error) {
console.error(error);
return response.status(500);
}
});
const tabby = express.Router();
tabby.post('/download', jsonParser, async function (request, response) {
try {
const baseUrl = String(request.body.api_server).replace(/\/$/, '');
const args = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(request.body),
timeout: 0,
};
setAdditionalHeaders(request, args, baseUrl);
// Check key permissions
const permissionResponse = await fetch(`${baseUrl}/v1/auth/permission`, {
headers: args.headers,
});
if (permissionResponse.ok) {
const permissionJson = await permissionResponse.json();
if (permissionJson['permission'] !== 'admin') {
return response.status(403).send({ error: true });
}
} else {
console.log('API Permission error:', permissionResponse.status, permissionResponse.statusText);
return response.status(permissionResponse.status).send({ error: true });
}
const fetchResponse = await fetch(`${baseUrl}/v1/download`, args);
if (!fetchResponse.ok) {
console.log('Download error:', fetchResponse.status, fetchResponse.statusText);
return response.status(fetchResponse.status).send({ error: true });
}
return response.send({ ok: true });
} catch (error) {
console.error(error);
return response.status(500);
}
});
router.use('/ollama', ollama);
router.use('/llamacpp', llamacpp);
router.use('/tabby', tabby);
module.exports = { router };