import {throttle} from "throttle-debounce"; import {Post, Topic} from "@/contexts/topics"; import {iso8601ToFrench, frenchToIso8601, getCurrentTimeIso8601} from "./dates" import {Settings} from "@/contexts/settings"; import {LogAction} from "@/contexts/log"; import {streamAPI as beamStreamAPI} from "@/utils/beam"; const titleRegex = /Sujet\s+:\s+"(.+?)"?<\|eot_id\|>/; const userRegex = /<\|im_pseudo\|>([^<]+)<\|end_pseudo\|>/; const dateRegex = /<\|im_date\|>([^<]+)<\|end_date\|>/; const contentRegex = /<\|begin_of_post\|>([\s\S]+)(?:<\|end_of_post\|>)?$/; export async function feedTopic( settings: Settings, log: LogAction, topicId: string, feed: (topic: Topic) => void ): Promise { // console.log(settings); let fetcher: (prompt: string, settings: Settings, log: LogAction) => AsyncGenerator; if (settings.apiType === "beam") { fetcher = beamStreamAPI; } const throttledTokensToTopic = throttle(250, (buffer: string) => { try { // console.log("-"); feed(tokensToTopic(topicId, buffer)); // console.log("_"); } catch (e) { // -- } }, {noLeading: true, noTrailing: false, debounceMode: false}); let buffer = ""; for await (const tokens of fetcher("", settings, log)) { // console.log("."); buffer += tokens; throttledTokensToTopic(buffer); } throttledTokensToTopic.cancel(); // console.log("loool") feed(tokensToTopic(topicId, buffer)); } export async function feedPosts( settings: Settings, log: LogAction, topic: Topic, feed: (topic: Topic) => void // topic with posts added ): Promise { // TODO: to avoid too long context: // If the topic exceed a certain amount of posts -> only take the 3 first post and the 3 last to generate the context const context = tokenizeTopic(topic); let fetcher: (prompt: string, settings: Settings, log: LogAction) => AsyncGenerator; if (settings.apiType === "beam") { fetcher = beamStreamAPI; } const throttledTokensToTopic = throttle(250, (buffer: string) => { try { // console.log("-"); feed(tokensToTopic(topic.id, buffer)); // console.log("_"); } catch (e) { // -- } }, {noLeading: true, noTrailing: false, debounceMode: false}); let buffer = context; for await (const tokens of fetcher(context, settings, log)) { // console.log("."); buffer += tokens; throttledTokensToTopic(buffer); } throttledTokensToTopic.cancel(); feed(tokensToTopic(topic.id, buffer)); } function tokensToTopic(id: string, tokens: string): Topic { const topic: Topic = { id: id, title: "", posts: [], }; // const splits = tokens.split("<|end_of_post|>") // console.log("Splits:") // console.log(splits); // Split token in posts // The last element is always vois, so remove it for(const postTokens of tokens.split("<|end_of_post|>").slice(0, -1)) { // console.log("Post tokens:") // console.log(postTokens); // If it's the first post if(topic.posts.length < 1) { const titleMatch = postTokens.match(titleRegex); if(!titleMatch) throw new Error("Impossible de trouver le titre du sujet"); // console.log(`title: ${titleMatch[1]}`) topic.title = titleMatch[1]; } // topic.posts.push(tokensToPosts()); topic.posts = topic.posts.concat(tokensToPosts(postTokens)); } return topic; } function tokensToPosts(tokens: string): Post[] { const posts: Post[] = []; for(const postTokens of tokens.split("<|end_of_post|>")) { // TODO: remove the last instead of doing this, because the last can be incomplete if(postTokens.length < 1) { continue; } // console.log("Post tokens:") // console.log(postTokens); const userMatch = postTokens.match(userRegex); if(!userMatch) throw new Error("Impossible de trouver le nom de l'auteur du message"); // console.log(`user: ${userMatch[1]}`) const dateMatch = postTokens.match(dateRegex); if(!dateMatch) throw new Error("Impossible de trouver la date du message"); // console.log(`date: ${dateMatch[1]}`) const contentMatch = postTokens.match(contentRegex); if(!contentMatch) throw new Error("Impossible de trouver le contenu du message"); // console.log(`content: ${contentMatch[1]}`) posts.push({ user: userMatch[1], date: frenchToIso8601(dateMatch[1]), generationDate: getCurrentTimeIso8601(), content: contentMatch[1], }); } return posts; } function tokenizeTopic(topic: Topic): string { if (topic.posts.length === 0) { throw new Error("Topic must have at least one post") } const tokenizedPosts = topic.posts.map(post => tokenizePost(post, topic.posts[0].user)).flat().join(""); // console.log("Tokenized posts:") // console.log(tokenizedPosts) let lines = [ "<|start_header_id|><|sujet|><|end_header_id|>", "", `Sujet : "${topic.title}"`, ]; return lines.join("\n") + tokenizedPosts; } function tokenizePost(post: Post, poster: string): string { let lines = [ `<|eot_id|><|start_header_id|><|${post.user === poster ? "autheur" : "khey"}|>`, "<|end_header_id|>", "", `<|im_pseudo|>${post.user}<|end_pseudo|>`, `<|im_date|>Le ${iso8601ToFrench(post.date)}<|end_date|>`, "", `<|begin_of_post|>${post.content}<|end_of_post|>` ]; return lines.join("\n"); }