Namitg02 commited on
Commit
2709754
·
verified ·
1 Parent(s): c494c5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -29,7 +29,7 @@ dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
29
  #docs = splitter.create_documents(str(dataset))
30
  # Returns a list of documents
31
  #print(docs)
32
- embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
33
  #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions
34
  #docs_text = [doc.text for doc in docs]
35
  #embed = embedding_model.embed_documents(docs_text)
@@ -47,8 +47,9 @@ print(data)
47
  d = 384 # vectors dimension
48
  m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
49
  #index = faiss.IndexHNSWFlat(d, m)
50
- index = faiss.IndexFlatL2(embedding_dim)
51
- data.add_faiss_index(embeddings.shape[1], custom_index=index)
 
52
  # adds an index column that for the embeddings
53
 
54
  print("check1")
 
29
  #docs = splitter.create_documents(str(dataset))
30
  # Returns a list of documents
31
  #print(docs)
32
+ embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
33
  #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions
34
  #docs_text = [doc.text for doc in docs]
35
  #embed = embedding_model.embed_documents(docs_text)
 
47
  d = 384 # vectors dimension
48
  m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
49
  #index = faiss.IndexHNSWFlat(d, m)
50
+ #index = faiss.IndexFlatL2(embedding_dim)
51
+ #data.add_faiss_index(embeddings.shape[1], custom_index=index)
52
+ data.add_faiss_index("embeddings")
53
  # adds an index column that for the embeddings
54
 
55
  print("check1")