Namitg02 commited on
Commit
1cc545e
·
verified ·
1 Parent(s): b82ee92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -14,6 +14,8 @@ from transformers import AutoTokenizer
14
  from transformers import AutoModelForCausalLM
15
  from transformers import TextIteratorStreamer
16
  from threading import Thread
 
 
17
 
18
 
19
  #dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
@@ -27,8 +29,8 @@ dataset = load_dataset("not-lain/wikipedia",revision = "embedded")
27
  #docs = splitter.create_documents(str(dataset))
28
  # Returns a list of documents
29
  #print(docs)
30
- embedding_model = SentenceTransformer("BAAI/bge-large-en-v1.5")
31
- #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2
32
  #docs_text = [doc.text for doc in docs]
33
  #embed = embedding_model.embed_documents(docs_text)
34
 
@@ -41,6 +43,8 @@ embedding_model = SentenceTransformer("BAAI/bge-large-en-v1.5")
41
  data = dataset["train"]
42
 
43
  print(data)
 
 
44
  data = data.add_faiss_index("embeddings")
45
  # adds an index column that for the embeddings
46
 
 
14
  from transformers import AutoModelForCausalLM
15
  from transformers import TextIteratorStreamer
16
  from threading import Thread
17
+ from langchain import Dimension
18
+
19
 
20
 
21
  #dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
 
29
  #docs = splitter.create_documents(str(dataset))
30
  # Returns a list of documents
31
  #print(docs)
32
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
33
+ #all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5 working with default dimensions
34
  #docs_text = [doc.text for doc in docs]
35
  #embed = embedding_model.embed_documents(docs_text)
36
 
 
43
  data = dataset["train"]
44
 
45
  print(data)
46
+ d = 384
47
+ faiss = faiss.IndexFlatL2(d)
48
  data = data.add_faiss_index("embeddings")
49
  # adds an index column that for the embeddings
50