michaelfeil
commited on
Commit
•
c686254
1
Parent(s):
0255774
update demo to use actual index
Browse files
search.py
CHANGED
@@ -25,7 +25,7 @@ index = None
|
|
25 |
docs_index = None
|
26 |
|
27 |
|
28 |
-
def build_index(demo_mode=
|
29 |
global index, docs_index
|
30 |
index = Index(
|
31 |
ndim=embed_texts_sync(["Hi"]).shape[
|
@@ -48,11 +48,14 @@ def build_index(demo_mode=True):
|
|
48 |
index.add(np.arange(len(docs_index)), embeddings)
|
49 |
return
|
50 |
else:
|
|
|
51 |
ds = datasets.load_dataset("michaelfeil/mined_docstrings_pypi_embedded")
|
52 |
ds = ds["train"]
|
53 |
docs_index = ds["code"]
|
54 |
-
embeddings = ds["embed_func_code"]
|
|
|
55 |
index.add(np.arange(len(docs_index)), embeddings)
|
|
|
56 |
|
57 |
if index is None:
|
58 |
build_index()
|
|
|
25 |
docs_index = None
|
26 |
|
27 |
|
28 |
+
def build_index(demo_mode=False):
|
29 |
global index, docs_index
|
30 |
index = Index(
|
31 |
ndim=embed_texts_sync(["Hi"]).shape[
|
|
|
48 |
index.add(np.arange(len(docs_index)), embeddings)
|
49 |
return
|
50 |
else:
|
51 |
+
print("loading 280k dataset")
|
52 |
ds = datasets.load_dataset("michaelfeil/mined_docstrings_pypi_embedded")
|
53 |
ds = ds["train"]
|
54 |
docs_index = ds["code"]
|
55 |
+
embeddings = np.array(ds["embed_func_code"])
|
56 |
+
print("indexing the 280k vectors")
|
57 |
index.add(np.arange(len(docs_index)), embeddings)
|
58 |
+
print("usearch index done.")
|
59 |
|
60 |
if index is None:
|
61 |
build_index()
|