Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ tqdm.pandas()
|
|
11 |
|
12 |
#######################################################################################
|
13 |
|
|
|
|
|
14 |
# Setup transaction details
|
15 |
repo_id = "bluuebunny/arxiv_abstract_embedding_mxbai_large_v1_milvus"
|
16 |
repo_type = "dataset"
|
@@ -24,7 +26,7 @@ snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=local_dir)
|
|
24 |
|
25 |
# Function to convert dense vector to binary vector
|
26 |
def dense_to_binary(dense_vector):
|
27 |
-
|
28 |
|
29 |
|
30 |
# Gather fp32 files
|
@@ -80,7 +82,7 @@ print("Upload complete")
|
|
80 |
|
81 |
# Function to convert dense vector to binary vector
|
82 |
def binary_to_mrl(binary_vector, size=512):
|
83 |
-
|
84 |
|
85 |
# Gather fp32 files
|
86 |
binaries = glob('binary_embeddings/*.parquet')
|
|
|
11 |
|
12 |
#######################################################################################
|
13 |
|
14 |
+
print("Downloading repo")
|
15 |
+
|
16 |
# Setup transaction details
|
17 |
repo_id = "bluuebunny/arxiv_abstract_embedding_mxbai_large_v1_milvus"
|
18 |
repo_type = "dataset"
|
|
|
26 |
|
27 |
# Function to convert dense vector to binary vector
|
28 |
def dense_to_binary(dense_vector):
|
29 |
+
return np.packbits(np.where(dense_vector >= 0, 1, 0))
|
30 |
|
31 |
|
32 |
# Gather fp32 files
|
|
|
82 |
|
83 |
# Function to convert dense vector to binary vector
|
84 |
def binary_to_mrl(binary_vector, size=512):
|
85 |
+
return np.packbits(np.unpackbits(binary_vector)[:size])
|
86 |
|
87 |
# Gather fp32 files
|
88 |
binaries = glob('binary_embeddings/*.parquet')
|