web-llm-embed / src /pages /api /docHandle.ts
matt HOFFNER
less langchain more chroma
8c64b1d
raw
history blame
1.24 kB
import type { NextApiRequest, NextApiResponse } from 'next';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { TransformersEmbeddingFunction } from '../../embed/hf';
const {ChromaClient} = require('chromadb');
const client = new ChromaClient();
async function handleDocs(text: string) {
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
const docs = await textSplitter.createDocuments([text]);
const collection = await client.createCollection({name: "docs", embeddingFunction: TransformersEmbeddingFunction})
await collection.add({
ids: [...docs.map((v, k) => k)],
metadatas: [...docs.map(doc => doc.metadata)],
documents: [...docs.map(doc => doc.pageContent)],
});
return collection;
}
export default async function handler(
req: NextApiRequest,
res: NextApiResponse,
) {
const { text } = JSON.parse(req.body);
// console.log(text);
if (!text) {
return res.status(400).json({ message: 'No question in the request' });
}
const vectorStore = await handleDocs(text);
res.status(200).send({
model: vectorStore,
});
}
export const config = {
api: {
bodyParser: true, // Disallow body parsing, consume as stream
},
};