Spaces:

Namitg02
/

Test

Runtime error

Namitg02 commited on May 21, 2024

Commit

2709754

verified ·

1 Parent(s): c494c5e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
 #docs = splitter.create_documents(str(dataset))
 # Returns a list of documents
 #print(docs)
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions
 #docs_text = [doc.text for doc in docs]
 #embed = embedding_model.embed_documents(docs_text)
@@ -47,8 +47,9 @@ print(data)
 d = 384  # vectors dimension
 m = 32  # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
 #index = faiss.IndexHNSWFlat(d, m)
-index =  faiss.IndexFlatL2(embedding_dim)
-data.add_faiss_index(embeddings.shape[1], custom_index=index)
 # adds an index column that for the embeddings
 print("check1")

 #docs = splitter.create_documents(str(dataset))
 # Returns a list of documents
 #print(docs)
+embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
 #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions
 #docs_text = [doc.text for doc in docs]
 #embed = embedding_model.embed_documents(docs_text)
 d = 384  # vectors dimension
 m = 32  # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
 #index = faiss.IndexHNSWFlat(d, m)
+#index =  faiss.IndexFlatL2(embedding_dim)
+#data.add_faiss_index(embeddings.shape[1], custom_index=index)
+data.add_faiss_index("embeddings")
 # adds an index column that for the embeddings
 print("check1")