File size: 1,502 Bytes
624088c c32ec0d 624088c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
"use server"
import { HfInference } from "@huggingface/inference"
const hfi = new HfInference(process.env.HF_API_TOKEN)
const hf = hfi.endpoint(`${process.env.HF_INFERENCE_ENDPOINT_URL || ""}`)
export async function predict(inputs: string) {
console.log(`predict: `, inputs)
let instructions = ""
try {
for await (const output of hf.textGenerationStream({
inputs,
parameters: {
do_sample: true,
// hard limit for max_new_tokens is 1512
max_new_tokens: 330, // 1150,
return_full_text: false,
}
})) {
instructions += output.token.text
process.stdout.write(output.token.text)
if (
instructions.includes("</s>") ||
instructions.includes("<s>") ||
instructions.includes("[INST]") ||
instructions.includes("[/INST]") ||
instructions.includes("<SYS>") ||
instructions.includes("</SYS>") ||
instructions.includes("<|end|>") ||
instructions.includes("<|assistant|>")
) {
break
}
}
} catch (err) {
console.error(`error during generation: ${err}`)
}
// need to do some cleanup of the garbage the LLM might have gave us
return (
instructions
.replaceAll("<|end|>", "")
.replaceAll("<s>", "")
.replaceAll("</s>", "")
.replaceAll("[INST]", "")
.replaceAll("[/INST]", "")
.replaceAll("<SYS>", "")
.replaceAll("</SYS>", "")
.replaceAll("<|assistant|>", "")
.replaceAll('""', '"')
)
} |