Namitg02 commited on
Commit
30ff03c
·
verified ·
1 Parent(s): 6458e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -4,18 +4,20 @@ print(dataset)
4
 
5
  from langchain.docstore.document import Document as LangchainDocument
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- #splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
8
- #docs = splitter.create_documents(str(dataset))
9
 
10
  from sentence_transformers import SentenceTransformer
11
- #from langchain_community.embeddings import HuggingFaceEmbeddings
12
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
- data = dataset["train"]
 
 
14
  data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
15
 
16
 
17
  from langchain_community.vectorstores import Chroma
18
- persist_directory = 'docs/chroma/'
19
 
20
  #vectordb = Chroma.from_documents(
21
  # documents=docs,
 
4
 
5
  from langchain.docstore.document import Document as LangchainDocument
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
8
+ docs = splitter.create_documents(str(dataset))
9
 
10
  from sentence_transformers import SentenceTransformer
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
+ data = FAISS.from_documents(docs, embedding_model)
14
+
15
+ #data = dataset["train"]
16
  data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
17
 
18
 
19
  from langchain_community.vectorstores import Chroma
20
+ #persist_directory = 'docs/chroma/'
21
 
22
  #vectordb = Chroma.from_documents(
23
  # documents=docs,