Namitg02 commited on
Commit
dd6cf15
·
verified ·
1 Parent(s): 0851688

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -20,22 +20,22 @@ dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
20
  print(dataset)
21
  # Returns a list of dictionaries, each representing a row in the dataset.
22
  print(dataset[1])
23
- splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=25,separators=["\n\n"]) # ["\n\n", "\n", " ", ""])
24
 
25
 
26
- docs = splitter.create_documents(str(dataset))
27
  # Returns a list of documents
28
- print(docs)
29
  embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
30
- docs_text = [doc.text for doc in docs]
31
- embed = embedding_model.embed_documents(docs_text)
32
 
33
- data = FAISS.from_embeddings(embed, embedding_model)
34
  #data = FAISS.from_texts(docs, embedding_model)
35
 
36
  # Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
37
 
38
- #data = dataset["train"]
39
  data = data.add_faiss_index("embeddings")
40
  # adds a column that has a index of embeddings
41
 
 
20
  print(dataset)
21
  # Returns a list of dictionaries, each representing a row in the dataset.
22
  print(dataset[1])
23
+ #splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=25,separators=["\n\n"]) # ["\n\n", "\n", " ", ""])
24
 
25
 
26
+ #docs = splitter.create_documents(str(dataset))
27
  # Returns a list of documents
28
+ #print(docs)
29
  embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
30
+ #docs_text = [doc.text for doc in docs]
31
+ #embed = embedding_model.embed_documents(docs_text)
32
 
33
+ #data = FAISS.from_embeddings(embed, embedding_model)
34
  #data = FAISS.from_texts(docs, embedding_model)
35
 
36
  # Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
37
 
38
+ data = dataset["train"]
39
  data = data.add_faiss_index("embeddings")
40
  # adds a column that has a index of embeddings
41