Spaces:

Namitg02
/

Test

Runtime error

Namitg02 commited on May 21, 2024

Commit

d2de7c8

verified ·

1 Parent(s): 2709754

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,11 +19,12 @@ from threading import Thread
 #dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
-dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
-# Returns a list of dictionaries, each representing a row in the dataset.
 #print(dataset[1])
-#splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=25,separators=["\n\n"]) # ["\n\n", "\n", " ", ""])
 #docs = splitter.create_documents(str(dataset))
@@ -40,16 +41,16 @@ print(embedding_dim)
 # Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
-#data = dataset["text"]
-data = dataset["train"]
-print(data)
 d = 384  # vectors dimension
 m = 32  # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
 #index = faiss.IndexHNSWFlat(d, m)
 #index =  faiss.IndexFlatL2(embedding_dim)
 #data.add_faiss_index(embeddings.shape[1], custom_index=index)
-data.add_faiss_index("embeddings")
 # adds an index column that for the embeddings
 print("check1")

 #dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
+#dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
+dataset = load_dataset("epfl-llm/guidelines"",)
+#Returns a list of dictionaries, each representing a row in the dataset.
 #print(dataset[1])
+# splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=25,separators=["\n"]) # ["\n\n", "\n", " ", ""])
 #docs = splitter.create_documents(str(dataset))
 # Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
+data = dataset["clean_text"]
+#data = dataset["train"]
+#print(data)
 d = 384  # vectors dimension
 m = 32  # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
 #index = faiss.IndexHNSWFlat(d, m)
 #index =  faiss.IndexFlatL2(embedding_dim)
 #data.add_faiss_index(embeddings.shape[1], custom_index=index)
+data.add_faiss_index("embeddings")
 # adds an index column that for the embeddings
 print("check1")