Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,12 @@ print(dataset)
|
|
6 |
|
7 |
from langchain.docstore.document import Document as LangchainDocument
|
8 |
|
9 |
-
RAW_KNOWLEDGE_BASE = [
|
10 |
-
LangchainDocument(page_content=["dataset"])
|
11 |
-
]
|
12 |
|
13 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
14 |
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
|
15 |
-
docs = splitter.split_documents(RAW_KNOWLEDGE_BASE)
|
16 |
-
|
17 |
|
18 |
|
19 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
@@ -25,7 +23,7 @@ from langchain_community.vectorstores import Chroma
|
|
25 |
persist_directory = 'docs/chroma/'
|
26 |
|
27 |
vectordb = Chroma.from_documents(
|
28 |
-
documents=
|
29 |
embedding=embedding_model,
|
30 |
persist_directory=persist_directory
|
31 |
)
|
|
|
6 |
|
7 |
from langchain.docstore.document import Document as LangchainDocument
|
8 |
|
9 |
+
#RAW_KNOWLEDGE_BASE = [LangchainDocument(page_content=["dataset"])]
|
|
|
|
|
10 |
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
|
13 |
+
#docs = splitter.split_documents(RAW_KNOWLEDGE_BASE)
|
14 |
+
docs = splitter.create_document(str(dataset))
|
15 |
|
16 |
|
17 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
23 |
persist_directory = 'docs/chroma/'
|
24 |
|
25 |
vectordb = Chroma.from_documents(
|
26 |
+
documents=docs,
|
27 |
embedding=embedding_model,
|
28 |
persist_directory=persist_directory
|
29 |
)
|