|
import {throttle} from "throttle-debounce"; |
|
import {Post, Topic} from "@/contexts/topics"; |
|
import {iso8601ToFrench, frenchToIso8601, getCurrentTimeIso8601} from "./dates" |
|
import {Settings} from "@/contexts/settings"; |
|
import {LogAction} from "@/contexts/log"; |
|
import {streamAPI as beamStreamAPI} from "@/utils/beam"; |
|
|
|
const titleRegex = /Sujet\s+:\s+"(.+?)"?<\|eot_id\|>/; |
|
const userRegex = /<\|im_pseudo\|>([^<]+)<\|end_pseudo\|>/; |
|
const dateRegex = /<\|im_date\|>([^<]+)<\|end_date\|>/; |
|
const contentRegex = /<\|begin_of_post\|>([\s\S]+)(?:<\|end_of_post\|>)?$/; |
|
|
|
export async function feedTopic( |
|
settings: Settings, |
|
log: LogAction, |
|
topicId: string, |
|
feed: (topic: Topic) => void |
|
): Promise<void> { |
|
|
|
let fetcher: (prompt: string, settings: Settings, log: LogAction) => AsyncGenerator<string>; |
|
|
|
if (settings.apiType === "beam") { |
|
fetcher = beamStreamAPI; |
|
} |
|
|
|
const throttledTokensToTopic = throttle(250, (buffer: string) => { |
|
try { |
|
|
|
feed(tokensToTopic(topicId, buffer)); |
|
|
|
} catch (e) { |
|
|
|
} |
|
}, {noLeading: true, noTrailing: false, debounceMode: false}); |
|
|
|
let buffer = ""; |
|
for await (const tokens of fetcher("", settings, log)) { |
|
|
|
buffer += tokens; |
|
throttledTokensToTopic(buffer); |
|
} |
|
|
|
throttledTokensToTopic.cancel(); |
|
|
|
|
|
|
|
feed(tokensToTopic(topicId, buffer)); |
|
} |
|
|
|
export async function feedPosts( |
|
settings: Settings, |
|
log: LogAction, |
|
topic: Topic, |
|
feed: (topic: Topic) => void |
|
): Promise<void> { |
|
|
|
|
|
const context = tokenizeTopic(topic); |
|
|
|
let fetcher: (prompt: string, settings: Settings, log: LogAction) => AsyncGenerator<string>; |
|
|
|
if (settings.apiType === "beam") { |
|
fetcher = beamStreamAPI; |
|
} |
|
|
|
const throttledTokensToTopic = throttle(250, (buffer: string) => { |
|
try { |
|
|
|
feed(tokensToTopic(topic.id, buffer)); |
|
|
|
} catch (e) { |
|
|
|
} |
|
}, {noLeading: true, noTrailing: false, debounceMode: false}); |
|
|
|
let buffer = context; |
|
for await (const tokens of fetcher(context, settings, log)) { |
|
|
|
buffer += tokens; |
|
throttledTokensToTopic(buffer); |
|
} |
|
|
|
throttledTokensToTopic.cancel(); |
|
|
|
feed(tokensToTopic(topic.id, buffer)); |
|
} |
|
|
|
function tokensToTopic(id: string, tokens: string): Topic { |
|
const topic: Topic = { |
|
id: id, |
|
title: "", |
|
posts: [], |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(const postTokens of tokens.split("<|end_of_post|>").slice(0, -1)) { |
|
|
|
|
|
|
|
|
|
if(topic.posts.length < 1) { |
|
const titleMatch = postTokens.match(titleRegex); |
|
if(!titleMatch) throw new Error("Impossible de trouver le titre du sujet"); |
|
|
|
|
|
topic.title = titleMatch[1]; |
|
} |
|
|
|
|
|
topic.posts = topic.posts.concat(tokensToPosts(postTokens)); |
|
} |
|
|
|
return topic; |
|
} |
|
|
|
function tokensToPosts(tokens: string): Post[] { |
|
const posts: Post[] = []; |
|
|
|
for(const postTokens of tokens.split("<|end_of_post|>")) { |
|
|
|
|
|
if(postTokens.length < 1) { |
|
continue; |
|
} |
|
|
|
|
|
|
|
|
|
const userMatch = postTokens.match(userRegex); |
|
if(!userMatch) throw new Error("Impossible de trouver le nom de l'auteur du message"); |
|
|
|
|
|
const dateMatch = postTokens.match(dateRegex); |
|
if(!dateMatch) throw new Error("Impossible de trouver la date du message"); |
|
|
|
|
|
const contentMatch = postTokens.match(contentRegex); |
|
if(!contentMatch) throw new Error("Impossible de trouver le contenu du message"); |
|
|
|
|
|
posts.push({ |
|
user: userMatch[1], |
|
date: frenchToIso8601(dateMatch[1]), |
|
generationDate: getCurrentTimeIso8601(), |
|
content: contentMatch[1], |
|
}); |
|
} |
|
|
|
return posts; |
|
} |
|
|
|
|
|
function tokenizeTopic(topic: Topic): string { |
|
if (topic.posts.length === 0) { |
|
throw new Error("Topic must have at least one post") |
|
} |
|
|
|
const tokenizedPosts = topic.posts.map(post => tokenizePost(post, topic.posts[0].user)).flat().join(""); |
|
|
|
|
|
|
|
|
|
let lines = [ |
|
"<|start_header_id|><|sujet|><|end_header_id|>", |
|
"", |
|
`Sujet : "${topic.title}"`, |
|
]; |
|
|
|
return lines.join("\n") + tokenizedPosts; |
|
} |
|
|
|
function tokenizePost(post: Post, poster: string): string { |
|
let lines = [ |
|
`<|eot_id|><|start_header_id|><|${post.user === poster ? "autheur" : "khey"}|>`, |
|
"<|end_header_id|>", |
|
"", |
|
`<|im_pseudo|>${post.user}<|end_pseudo|>`, |
|
`<|im_date|>Le ${iso8601ToFrench(post.date)}<|end_date|>`, |
|
"", |
|
`<|begin_of_post|>${post.content}<|end_of_post|>` |
|
]; |
|
|
|
return lines.join("\n"); |
|
} |