bluuebunny commited on
Commit
2ff895a
·
verified ·
1 Parent(s): a6bb8b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -26,8 +26,7 @@ snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=local_dir)
26
 
27
  # Function to convert dense vector to binary vector
28
  def dense_to_binary(dense_vector):
29
- return np.packbits(np.where(dense_vector >= 0, 1, 0))
30
-
31
 
32
  # Gather fp32 files
33
  floats = glob('data/*.parquet')
@@ -47,11 +46,6 @@ for file in floats:
47
 
48
  df.to_parquet(f'binary_embeddings/{os.path.basename(file)}')
49
 
50
- # Print the size of the original and binary files
51
- original_size = os.path.getsize(file)
52
- binary_size = os.path.getsize(f'binary_embeddings/{os.path.basename(file)}')
53
- print(f'Original size: {original_size} bytes, Binary size: {binary_size} bytes')
54
-
55
  #######################################################################################
56
 
57
 
@@ -81,32 +75,27 @@ print("Upload complete")
81
  #######################################################################################
82
 
83
  # Function to convert dense vector to binary vector
84
- def binary_to_mrl(binary_vector, size=512):
85
- return np.packbits(np.unpackbits(binary_vector)[:size])
86
 
87
  # Gather fp32 files
88
- binaries = glob('binary_embeddings/*.parquet')
89
 
90
  # Create a folder to store binary embeddings
91
  os.makedirs('bmrl_embeddings', exist_ok=True)
92
 
93
  # Convert and save each file
94
- for file in binaries:
95
 
96
 
97
  print(f"Processing file: {file}")
98
 
99
  df = pd.read_parquet(file)
100
 
101
- df['vector'] = df['vector'].progress_apply(binary_to_mrl)
102
 
103
  df.to_parquet(f'bmrl_embeddings/{os.path.basename(file)}')
104
 
105
- # Print the size of the original and binary files
106
- original_size = os.path.getsize(file)
107
- binary_size = os.path.getsize(f'bmrl_embeddings/{os.path.basename(file)}')
108
- print(f'Original size: {original_size} bytes, Binary size: {binary_size} bytes')
109
-
110
  #######################################################################################
111
 
112
 
 
26
 
27
  # Function to convert dense vector to binary vector
28
  def dense_to_binary(dense_vector):
29
+ return np.packbits(np.where(dense_vector >= 0, 1, 0)).tobytes()
 
30
 
31
  # Gather fp32 files
32
  floats = glob('data/*.parquet')
 
46
 
47
  df.to_parquet(f'binary_embeddings/{os.path.basename(file)}')
48
 
 
 
 
 
 
49
  #######################################################################################
50
 
51
 
 
75
  #######################################################################################
76
 
77
  # Function to convert dense vector to binary vector
78
+ def dense_to_bmrl(binary_vector, size=512):
79
+ return np.packbits(np.where(dense_vector >= 0, 1, 0)[:size]).tobytes()
80
 
81
  # Gather fp32 files
82
+ floats = glob('data/*.parquet')
83
 
84
  # Create a folder to store binary embeddings
85
  os.makedirs('bmrl_embeddings', exist_ok=True)
86
 
87
  # Convert and save each file
88
+ for file in floats:
89
 
90
 
91
  print(f"Processing file: {file}")
92
 
93
  df = pd.read_parquet(file)
94
 
95
+ df['vector'] = df['vector'].progress_apply(dense_to_bmrl)
96
 
97
  df.to_parquet(f'bmrl_embeddings/{os.path.basename(file)}')
98
 
 
 
 
 
 
99
  #######################################################################################
100
 
101