Spaces:
Runtime error
Runtime error
File size: 4,462 Bytes
833d7c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
const paramDefaults = {
stream: true,
n_predict: 500,
temperature: 0.2,
stop: ["</s>"]
};
let generation_settings = null;
// Completes the prompt as a generator. Recommended for most use cases.
//
// Example:
//
// import { llama } from '/completion.js'
//
// const request = llama("Tell me a joke", {n_predict: 800})
// for await (const chunk of request) {
// document.write(chunk.data.content)
// }
//
export async function* llama(prompt, params = {}, config = {}) {
let controller = config.controller;
if (!controller) {
controller = new AbortController();
}
const completionParams = { ...paramDefaults, ...params, prompt };
const response = await fetch("/completion", {
method: 'POST',
body: JSON.stringify(completionParams),
headers: {
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Accept': 'text/event-stream'
},
signal: controller.signal,
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
let content = "";
try {
let cont = true;
while (cont) {
const result = await reader.read();
if (result.done) {
break;
}
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
// mainly care about the data: key here, which we expect as json
const text = decoder.decode(result.value);
// parse all sse events and add them to result
const regex = /^(\S+):\s(.*)$/gm;
for (const match of text.matchAll(regex)) {
result[match[1]] = match[2]
}
// since we know this is llama.cpp, let's just decode the json in data
result.data = JSON.parse(result.data);
content += result.data.content;
// yield
yield result;
// if we got a stop token from server, we will break here
if (result.data.stop) {
if (result.data.generation_settings) {
generation_settings = result.data.generation_settings;
}
break;
}
}
} catch (e) {
if (e.name !== 'AbortError') {
console.error("llama error: ", e);
}
throw e;
}
finally {
controller.abort();
}
return content;
}
// Call llama, return an event target that you can subcribe to
//
// Example:
//
// import { llamaEventTarget } from '/completion.js'
//
// const conn = llamaEventTarget(prompt)
// conn.addEventListener("message", (chunk) => {
// document.write(chunk.detail.content)
// })
//
export const llamaEventTarget = (prompt, params = {}, config = {}) => {
const eventTarget = new EventTarget();
(async () => {
let content = "";
for await (const chunk of llama(prompt, params, config)) {
if (chunk.data) {
content += chunk.data.content;
eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
}
if (chunk.data.generation_settings) {
eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
}
if (chunk.data.timings) {
eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
}
}
eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
})();
return eventTarget;
}
// Call llama, return a promise that resolves to the completed text. This does not support streaming
//
// Example:
//
// llamaPromise(prompt).then((content) => {
// document.write(content)
// })
//
// or
//
// const content = await llamaPromise(prompt)
// document.write(content)
//
export const llamaPromise = (prompt, params = {}, config = {}) => {
return new Promise(async (resolve, reject) => {
let content = "";
try {
for await (const chunk of llama(prompt, params, config)) {
content += chunk.data.content;
}
resolve(content);
} catch (error) {
reject(error);
}
});
};
/**
* (deprecated)
*/
export const llamaComplete = async (params, controller, callback) => {
for await (const chunk of llama(params.prompt, params, { controller })) {
callback(chunk);
}
}
// Get the model info from the server. This is useful for getting the context window and so on.
export const llamaModelInfo = async () => {
if (!generation_settings) {
generation_settings = await fetch("/model.json").then(r => r.json());
}
return generation_settings;
}
|