Spaces:
Runtime error
Runtime error
matt HOFFNER
commited on
Commit
Β·
1300e36
1
Parent(s):
5d239ba
cleanup
Browse files- package-lock.json +13 -2
- package.json +1 -0
- src/components/ChatWindow.jsx +22 -11
- src/components/FileLoader.jsx +11 -9
package-lock.json
CHANGED
@@ -16,11 +16,11 @@
|
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"chromadb": "^1.5.2",
|
|
|
19 |
"dexie": "^3.2.4",
|
20 |
"eslint": "8.40.0",
|
21 |
"eslint-config-next": "13.4.2",
|
22 |
"fs-extra": "^11.1.1",
|
23 |
-
"hnswlib-node": "^1.4.2",
|
24 |
"langchain": "^0.0.90",
|
25 |
"next": "13.4.2",
|
26 |
"pdfjs-dist": "^3.7.107",
|
@@ -1968,6 +1968,8 @@
|
|
1968 |
"version": "1.5.0",
|
1969 |
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
|
1970 |
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
|
|
|
|
1971 |
"dependencies": {
|
1972 |
"file-uri-to-path": "1.0.0"
|
1973 |
}
|
@@ -2220,6 +2222,11 @@
|
|
2220 |
"resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
|
2221 |
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
|
2222 |
},
|
|
|
|
|
|
|
|
|
|
|
2223 |
"node_modules/color": {
|
2224 |
"version": "4.2.3",
|
2225 |
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
|
@@ -3285,7 +3292,9 @@
|
|
3285 |
"node_modules/file-uri-to-path": {
|
3286 |
"version": "1.0.0",
|
3287 |
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
3288 |
-
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
|
|
|
|
|
3289 |
},
|
3290 |
"node_modules/filelist": {
|
3291 |
"version": "1.0.4",
|
@@ -3802,6 +3811,8 @@
|
|
3802 |
"resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
|
3803 |
"integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
|
3804 |
"hasInstallScript": true,
|
|
|
|
|
3805 |
"dependencies": {
|
3806 |
"bindings": "^1.5.0",
|
3807 |
"node-addon-api": "^6.0.0"
|
|
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"chromadb": "^1.5.2",
|
19 |
+
"cohere-ai": "^5.1.0",
|
20 |
"dexie": "^3.2.4",
|
21 |
"eslint": "8.40.0",
|
22 |
"eslint-config-next": "13.4.2",
|
23 |
"fs-extra": "^11.1.1",
|
|
|
24 |
"langchain": "^0.0.90",
|
25 |
"next": "13.4.2",
|
26 |
"pdfjs-dist": "^3.7.107",
|
|
|
1968 |
"version": "1.5.0",
|
1969 |
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
|
1970 |
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
|
1971 |
+
"optional": true,
|
1972 |
+
"peer": true,
|
1973 |
"dependencies": {
|
1974 |
"file-uri-to-path": "1.0.0"
|
1975 |
}
|
|
|
2222 |
"resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
|
2223 |
"integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
|
2224 |
},
|
2225 |
+
"node_modules/cohere-ai": {
|
2226 |
+
"version": "5.1.0",
|
2227 |
+
"resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-5.1.0.tgz",
|
2228 |
+
"integrity": "sha512-7q3z3w6GSoPxQqRL9G6QTaQ0e513auVE1JlNDnqnoFEXGtDbkVfaTOliR5qrMoK//74Csb0NW669evqngwPx3g=="
|
2229 |
+
},
|
2230 |
"node_modules/color": {
|
2231 |
"version": "4.2.3",
|
2232 |
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
|
|
|
3292 |
"node_modules/file-uri-to-path": {
|
3293 |
"version": "1.0.0",
|
3294 |
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
|
3295 |
+
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
|
3296 |
+
"optional": true,
|
3297 |
+
"peer": true
|
3298 |
},
|
3299 |
"node_modules/filelist": {
|
3300 |
"version": "1.0.4",
|
|
|
3811 |
"resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
|
3812 |
"integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
|
3813 |
"hasInstallScript": true,
|
3814 |
+
"optional": true,
|
3815 |
+
"peer": true,
|
3816 |
"dependencies": {
|
3817 |
"bindings": "^1.5.0",
|
3818 |
"node-addon-api": "^6.0.0"
|
package.json
CHANGED
@@ -16,6 +16,7 @@
|
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"chromadb": "^1.5.2",
|
|
|
19 |
"dexie": "^3.2.4",
|
20 |
"eslint": "8.40.0",
|
21 |
"eslint-config-next": "13.4.2",
|
|
|
16 |
"@types/react-dom": "18.2.4",
|
17 |
"@xenova/transformers": "^2.1.1",
|
18 |
"chromadb": "^1.5.2",
|
19 |
+
"cohere-ai": "^5.1.0",
|
20 |
"dexie": "^3.2.4",
|
21 |
"eslint": "8.40.0",
|
22 |
"eslint-config-next": "13.4.2",
|
src/components/ChatWindow.jsx
CHANGED
@@ -5,9 +5,11 @@ import MessageList from './MessageList';
|
|
5 |
import {FileLoader} from './FileLoader';
|
6 |
import Loader from "./Loader";
|
7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
|
|
8 |
import { ChromaClient } from "chromadb";
|
9 |
|
10 |
const client = new ChromaClient();
|
|
|
11 |
|
12 |
function ChatWindow({
|
13 |
stopStrings,
|
@@ -29,19 +31,28 @@ function ChatWindow({
|
|
29 |
}
|
30 |
|
31 |
if (fileText) {
|
|
|
32 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
33 |
-
const docs = await textSplitter.createDocuments([
|
34 |
-
|
35 |
-
await
|
|
|
|
|
|
|
|
|
36 |
ids: [...docs.map((v, k) => k)],
|
37 |
metadatas: [...docs.map(doc => doc.metadata)],
|
38 |
documents: [...docs.map(doc => doc.pageContent)],
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
|
46 |
const qaPrompt =
|
47 |
`You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
|
@@ -50,7 +61,7 @@ function ChatWindow({
|
|
50 |
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
51 |
Question: ${userInput}
|
52 |
=========
|
53 |
-
${
|
54 |
=========
|
55 |
Answer:
|
56 |
`
|
@@ -85,7 +96,7 @@ function ChatWindow({
|
|
85 |
}, [handleSubmit]);
|
86 |
|
87 |
const loadFile = async () => {
|
88 |
-
console.log('
|
89 |
}
|
90 |
|
91 |
useEffect(() => {
|
|
|
5 |
import {FileLoader} from './FileLoader';
|
6 |
import Loader from "./Loader";
|
7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
8 |
+
import { TransformersEmbeddingFunction } from '../embed/hf';
|
9 |
import { ChromaClient } from "chromadb";
|
10 |
|
11 |
const client = new ChromaClient();
|
12 |
+
const embedder = new TransformersEmbeddingFunction({});
|
13 |
|
14 |
function ChatWindow({
|
15 |
stopStrings,
|
|
|
31 |
}
|
32 |
|
33 |
if (fileText) {
|
34 |
+
console.log('found file text splitting into chunks')
|
35 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
36 |
+
const docs = await textSplitter.createDocuments([fileText]);
|
37 |
+
console.log(`split docs: ${docs}`);
|
38 |
+
const collection = await client.createCollection({name: "docs", embeddingFunction: embedder })
|
39 |
+
console.log(`collection: ${collection}`);
|
40 |
+
let queryResult;
|
41 |
+
try {
|
42 |
+
await collection.add({
|
43 |
ids: [...docs.map((v, k) => k)],
|
44 |
metadatas: [...docs.map(doc => doc.metadata)],
|
45 |
documents: [...docs.map(doc => doc.pageContent)],
|
46 |
+
});
|
47 |
+
const queryResult = await collection.query({
|
48 |
+
nResults: 2,
|
49 |
+
queryTexts: [userPrompt]
|
50 |
+
});
|
51 |
+
console.log(queryResult);
|
52 |
+
} catch (err) {
|
53 |
+
console.log(err);
|
54 |
+
}
|
55 |
+
|
56 |
|
57 |
const qaPrompt =
|
58 |
`You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
|
|
|
61 |
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
62 |
Question: ${userInput}
|
63 |
=========
|
64 |
+
${queryResult}
|
65 |
=========
|
66 |
Answer:
|
67 |
`
|
|
|
96 |
}, [handleSubmit]);
|
97 |
|
98 |
const loadFile = async () => {
|
99 |
+
console.log('file loaded');
|
100 |
}
|
101 |
|
102 |
useEffect(() => {
|
src/components/FileLoader.jsx
CHANGED
@@ -5,6 +5,13 @@ import * as PDFJS from 'pdfjs-dist/build/pdf';
|
|
5 |
|
6 |
PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
export default class Pdf {
|
9 |
static async getPageText(pdf, pageNo) {
|
10 |
const page = await pdf.getPage(pageNo);
|
@@ -43,16 +50,11 @@ export const FileLoader = ({ setFileText }) => {
|
|
43 |
const blob = new Blob([file], { type: 'text/plain' });
|
44 |
if (file.type === "application/pdf") {
|
45 |
text = await Pdf.getPDFText(URL.createObjectURL(blob));
|
46 |
-
} else {
|
47 |
-
|
48 |
-
|
49 |
-
reader.addEventListener('load', function (e) {
|
50 |
-
text = e.target.result;
|
51 |
-
});
|
52 |
-
|
53 |
-
reader.readAsBinaryString(file);
|
54 |
}
|
55 |
-
|
|
|
56 |
setUploadStatus("Embed Complete");
|
57 |
}
|
58 |
}}
|
|
|
5 |
|
6 |
PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
|
7 |
|
8 |
+
const readFile = (blob) => new Promise((resolve, reject) => {
|
9 |
+
const reader = new FileReader();
|
10 |
+
reader.onload = (event) => resolve(event.target.result);
|
11 |
+
reader.onerror = reject;
|
12 |
+
reader.readAsText(blob);
|
13 |
+
});
|
14 |
+
|
15 |
export default class Pdf {
|
16 |
static async getPageText(pdf, pageNo) {
|
17 |
const page = await pdf.getPage(pageNo);
|
|
|
50 |
const blob = new Blob([file], { type: 'text/plain' });
|
51 |
if (file.type === "application/pdf") {
|
52 |
text = await Pdf.getPDFText(URL.createObjectURL(blob));
|
53 |
+
} else {
|
54 |
+
text = await readFile(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
}
|
56 |
+
console.log(`file text: ${text}`);
|
57 |
+
setFileText(text);
|
58 |
setUploadStatus("Embed Complete");
|
59 |
}
|
60 |
}}
|