Namitg02 commited on
Commit
30b8a93
·
verified ·
1 Parent(s): 229c387

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -4,10 +4,16 @@ from datasets import load_dataset
4
  dataset = load_dataset("Namitg02/Test")
5
  print(dataset)
6
 
 
 
 
 
 
 
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
9
- #docs = splitter.split_documents(dataset)
10
- docs = splitter.split_text(str(dataset))
11
 
12
 
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -17,6 +23,7 @@ embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
17
 
18
  from langchain_community.vectorstores import Chroma
19
  persist_directory = 'docs/chroma/'
 
20
  vectordb = Chroma.from_documents(
21
  documents=[docs],
22
  embedding=embedding_model,
 
4
  dataset = load_dataset("Namitg02/Test")
5
  print(dataset)
6
 
7
+ from langchain.docstore.document import Document as LangchainDocument
8
+
9
+ RAW_KNOWLEDGE_BASE = [
10
+ LangchainDocument(page_content=doc["dataset"], metadata={"one": doc["two"]})
11
+ ]
12
+
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
15
+ docs = splitter.split_documents(RAW_KNOWLEDGE_BASE)
16
+ #docs = splitter.split_text(str(dataset))
17
 
18
 
19
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
23
 
24
  from langchain_community.vectorstores import Chroma
25
  persist_directory = 'docs/chroma/'
26
+
27
  vectordb = Chroma.from_documents(
28
  documents=[docs],
29
  embedding=embedding_model,