Yoxas commited on
Commit
6ed1673
·
verified ·
1 Parent(s): 19d04b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -13,10 +13,10 @@ data = pd.read_csv('RBDx10kstats.csv')
13
  # Function to safely convert JSON strings to numpy arrays
14
  def safe_json_loads(x):
15
  try:
16
- return np.array(json.loads(x))
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
- return np.array([]) # Return an empty array or handle it as appropriate
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
@@ -25,7 +25,7 @@ data['embedding'] = data['embedding'].apply(safe_json_loads)
25
  data = data[data['embedding'].apply(lambda x: x.size > 0)]
26
 
27
  # Initialize FAISS index
28
- dimension = len(data['embedding'][0])
29
  res = faiss.StandardGpuResources() # use a single GPU
30
 
31
  # Create FAISS index
@@ -35,7 +35,9 @@ if faiss.get_num_gpus() > 0:
35
  else:
36
  gpu_index = faiss.IndexFlatL2(dimension) # fall back to CPU
37
 
38
- gpu_index.add(np.stack(data['embedding'].values))
 
 
39
 
40
  # Check if GPU is available
41
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -52,7 +54,7 @@ def embed_question(question, model, tokenizer):
52
  inputs = tokenizer(question, return_tensors='pt').to(device)
53
  with torch.no_grad():
54
  outputs = model(**inputs)
55
- return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
56
 
57
  # Function to retrieve the relevant document and generate a response
58
  @spaces.GPU(duration=120)
 
13
  # Function to safely convert JSON strings to numpy arrays
14
  def safe_json_loads(x):
15
  try:
16
+ return np.array(json.loads(x), dtype=np.float32) # Ensure the array is of type float32
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
+ return np.array([], dtype=np.float32) # Return an empty array or handle it as appropriate
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
 
25
  data = data[data['embedding'].apply(lambda x: x.size > 0)]
26
 
27
  # Initialize FAISS index
28
+ dimension = len(data['embedding'].iloc[0])
29
  res = faiss.StandardGpuResources() # use a single GPU
30
 
31
  # Create FAISS index
 
35
  else:
36
  gpu_index = faiss.IndexFlatL2(dimension) # fall back to CPU
37
 
38
+ # Ensure embeddings are stacked as float32
39
+ embeddings = np.vstack(data['embedding'].values).astype(np.float32)
40
+ gpu_index.add(embeddings)
41
 
42
  # Check if GPU is available
43
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
54
  inputs = tokenizer(question, return_tensors='pt').to(device)
55
  with torch.no_grad():
56
  outputs = model(**inputs)
57
+ return outputs.last_hidden_state.mean(dim=1).cpu().numpy().astype(np.float32)
58
 
59
  # Function to retrieve the relevant document and generate a response
60
  @spaces.GPU(duration=120)