Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ from transformers import AutoTokenizer
|
|
14 |
from transformers import AutoModelForCausalLM
|
15 |
from transformers import TextIteratorStreamer
|
16 |
from threading import Thread
|
17 |
-
from langchain import Dimension
|
18 |
|
19 |
|
20 |
|
@@ -43,9 +42,10 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
43 |
data = dataset["train"]
|
44 |
|
45 |
print(data)
|
46 |
-
d = 384
|
47 |
-
|
48 |
-
|
|
|
49 |
# adds an index column that for the embeddings
|
50 |
|
51 |
|
|
|
14 |
from transformers import AutoModelForCausalLM
|
15 |
from transformers import TextIteratorStreamer
|
16 |
from threading import Thread
|
|
|
17 |
|
18 |
|
19 |
|
|
|
42 |
data = dataset["train"]
|
43 |
|
44 |
print(data)
|
45 |
+
d = 384 # vectors dimension
|
46 |
+
m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
|
47 |
+
index = faiss.IndexHNSWFlat(d, m, faiss.METRIC_INNER_PRODUCT)
|
48 |
+
data = data.add_faiss_index("embeddings", custom_index=index)
|
49 |
# adds an index column that for the embeddings
|
50 |
|
51 |
|