matt HOFFNER commited on
Commit
1300e36
Β·
1 Parent(s): 5d239ba
package-lock.json CHANGED
@@ -16,11 +16,11 @@
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
  "chromadb": "^1.5.2",
 
19
  "dexie": "^3.2.4",
20
  "eslint": "8.40.0",
21
  "eslint-config-next": "13.4.2",
22
  "fs-extra": "^11.1.1",
23
- "hnswlib-node": "^1.4.2",
24
  "langchain": "^0.0.90",
25
  "next": "13.4.2",
26
  "pdfjs-dist": "^3.7.107",
@@ -1968,6 +1968,8 @@
1968
  "version": "1.5.0",
1969
  "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
1970
  "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
 
 
1971
  "dependencies": {
1972
  "file-uri-to-path": "1.0.0"
1973
  }
@@ -2220,6 +2222,11 @@
2220
  "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
2221
  "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
2222
  },
 
 
 
 
 
2223
  "node_modules/color": {
2224
  "version": "4.2.3",
2225
  "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
@@ -3285,7 +3292,9 @@
3285
  "node_modules/file-uri-to-path": {
3286
  "version": "1.0.0",
3287
  "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
3288
- "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw=="
 
 
3289
  },
3290
  "node_modules/filelist": {
3291
  "version": "1.0.4",
@@ -3802,6 +3811,8 @@
3802
  "resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
3803
  "integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
3804
  "hasInstallScript": true,
 
 
3805
  "dependencies": {
3806
  "bindings": "^1.5.0",
3807
  "node-addon-api": "^6.0.0"
 
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
  "chromadb": "^1.5.2",
19
+ "cohere-ai": "^5.1.0",
20
  "dexie": "^3.2.4",
21
  "eslint": "8.40.0",
22
  "eslint-config-next": "13.4.2",
23
  "fs-extra": "^11.1.1",
 
24
  "langchain": "^0.0.90",
25
  "next": "13.4.2",
26
  "pdfjs-dist": "^3.7.107",
 
1968
  "version": "1.5.0",
1969
  "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
1970
  "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
1971
+ "optional": true,
1972
+ "peer": true,
1973
  "dependencies": {
1974
  "file-uri-to-path": "1.0.0"
1975
  }
 
2222
  "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
2223
  "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA=="
2224
  },
2225
+ "node_modules/cohere-ai": {
2226
+ "version": "5.1.0",
2227
+ "resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-5.1.0.tgz",
2228
+ "integrity": "sha512-7q3z3w6GSoPxQqRL9G6QTaQ0e513auVE1JlNDnqnoFEXGtDbkVfaTOliR5qrMoK//74Csb0NW669evqngwPx3g=="
2229
+ },
2230
  "node_modules/color": {
2231
  "version": "4.2.3",
2232
  "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
 
3292
  "node_modules/file-uri-to-path": {
3293
  "version": "1.0.0",
3294
  "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
3295
+ "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
3296
+ "optional": true,
3297
+ "peer": true
3298
  },
3299
  "node_modules/filelist": {
3300
  "version": "1.0.4",
 
3811
  "resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-1.4.2.tgz",
3812
  "integrity": "sha512-76PIzOaNcX8kOpKwlFPl07uelpctqDMzbiC+Qsk2JWNVkzeU/6iXRk4tfE9z3DoK1RCBrOaFXmQ6RFb1BVF9LA==",
3813
  "hasInstallScript": true,
3814
+ "optional": true,
3815
+ "peer": true,
3816
  "dependencies": {
3817
  "bindings": "^1.5.0",
3818
  "node-addon-api": "^6.0.0"
package.json CHANGED
@@ -16,6 +16,7 @@
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
  "chromadb": "^1.5.2",
 
19
  "dexie": "^3.2.4",
20
  "eslint": "8.40.0",
21
  "eslint-config-next": "13.4.2",
 
16
  "@types/react-dom": "18.2.4",
17
  "@xenova/transformers": "^2.1.1",
18
  "chromadb": "^1.5.2",
19
+ "cohere-ai": "^5.1.0",
20
  "dexie": "^3.2.4",
21
  "eslint": "8.40.0",
22
  "eslint-config-next": "13.4.2",
src/components/ChatWindow.jsx CHANGED
@@ -5,9 +5,11 @@ import MessageList from './MessageList';
5
  import {FileLoader} from './FileLoader';
6
  import Loader from "./Loader";
7
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 
8
  import { ChromaClient } from "chromadb";
9
 
10
  const client = new ChromaClient();
 
11
 
12
  function ChatWindow({
13
  stopStrings,
@@ -29,19 +31,28 @@ function ChatWindow({
29
  }
30
 
31
  if (fileText) {
 
32
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
33
- const docs = await textSplitter.createDocuments([text]);
34
- const collection = await client.createCollection({name: "docs", embeddingFunction: TransformersEmbeddingFunction})
35
- await collection.add({
 
 
 
 
36
  ids: [...docs.map((v, k) => k)],
37
  metadatas: [...docs.map(doc => doc.metadata)],
38
  documents: [...docs.map(doc => doc.pageContent)],
39
- });
40
- const result = await collection.query({
41
- nResults: 2,
42
- queryTexts: [userPrompt]
43
- });
44
- console.log(result);
 
 
 
 
45
 
46
  const qaPrompt =
47
  `You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
@@ -50,7 +61,7 @@ function ChatWindow({
50
  If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
51
  Question: ${userInput}
52
  =========
53
- ${result}
54
  =========
55
  Answer:
56
  `
@@ -85,7 +96,7 @@ function ChatWindow({
85
  }, [handleSubmit]);
86
 
87
  const loadFile = async () => {
88
- console.log('test');
89
  }
90
 
91
  useEffect(() => {
 
5
  import {FileLoader} from './FileLoader';
6
  import Loader from "./Loader";
7
  import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
8
+ import { TransformersEmbeddingFunction } from '../embed/hf';
9
  import { ChromaClient } from "chromadb";
10
 
11
  const client = new ChromaClient();
12
+ const embedder = new TransformersEmbeddingFunction({});
13
 
14
  function ChatWindow({
15
  stopStrings,
 
31
  }
32
 
33
  if (fileText) {
34
+ console.log('found file text splitting into chunks')
35
  const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
36
+ const docs = await textSplitter.createDocuments([fileText]);
37
+ console.log(`split docs: ${docs}`);
38
+ const collection = await client.createCollection({name: "docs", embeddingFunction: embedder })
39
+ console.log(`collection: ${collection}`);
40
+ let queryResult;
41
+ try {
42
+ await collection.add({
43
  ids: [...docs.map((v, k) => k)],
44
  metadatas: [...docs.map(doc => doc.metadata)],
45
  documents: [...docs.map(doc => doc.pageContent)],
46
+ });
47
+ const queryResult = await collection.query({
48
+ nResults: 2,
49
+ queryTexts: [userPrompt]
50
+ });
51
+ console.log(queryResult);
52
+ } catch (err) {
53
+ console.log(err);
54
+ }
55
+
56
 
57
  const qaPrompt =
58
  `You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.
 
61
  If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
62
  Question: ${userInput}
63
  =========
64
+ ${queryResult}
65
  =========
66
  Answer:
67
  `
 
96
  }, [handleSubmit]);
97
 
98
  const loadFile = async () => {
99
+ console.log('file loaded');
100
  }
101
 
102
  useEffect(() => {
src/components/FileLoader.jsx CHANGED
@@ -5,6 +5,13 @@ import * as PDFJS from 'pdfjs-dist/build/pdf';
5
 
6
  PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
7
 
 
 
 
 
 
 
 
8
  export default class Pdf {
9
  static async getPageText(pdf, pageNo) {
10
  const page = await pdf.getPage(pageNo);
@@ -43,16 +50,11 @@ export const FileLoader = ({ setFileText }) => {
43
  const blob = new Blob([file], { type: 'text/plain' });
44
  if (file.type === "application/pdf") {
45
  text = await Pdf.getPDFText(URL.createObjectURL(blob));
46
- } else {
47
- let reader = new FileReader();
48
-
49
- reader.addEventListener('load', function (e) {
50
- text = e.target.result;
51
- });
52
-
53
- reader.readAsBinaryString(file);
54
  }
55
- setFileText(text)
 
56
  setUploadStatus("Embed Complete");
57
  }
58
  }}
 
5
 
6
  PDFJS.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDFJS.version}/pdf.worker.min.js`;
7
 
8
+ const readFile = (blob) => new Promise((resolve, reject) => {
9
+ const reader = new FileReader();
10
+ reader.onload = (event) => resolve(event.target.result);
11
+ reader.onerror = reject;
12
+ reader.readAsText(blob);
13
+ });
14
+
15
  export default class Pdf {
16
  static async getPageText(pdf, pageNo) {
17
  const page = await pdf.getPage(pageNo);
 
50
  const blob = new Blob([file], { type: 'text/plain' });
51
  if (file.type === "application/pdf") {
52
  text = await Pdf.getPDFText(URL.createObjectURL(blob));
53
+ } else {
54
+ text = await readFile(file)
 
 
 
 
 
 
55
  }
56
+ console.log(`file text: ${text}`);
57
+ setFileText(text);
58
  setUploadStatus("Embed Complete");
59
  }
60
  }}