pvanand commited on
Commit
2b67f80
·
1 Parent(s): 69d0aca

Update actions/search_content.py

Browse files
Files changed (1) hide show
  1. actions/search_content.py +7 -1
actions/search_content.py CHANGED
@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
7
  # Define paths for model, Faiss index, and data file
8
  MODEL_SAVE_PATH = "all-distilroberta-v1-model.pkl"
9
  FAISS_INDEX_FILE_PATH = "index.faiss"
10
- DATA_FILE_PATH = "/content/omdena_faq_training_data.csv"
11
 
12
  def load_transformer_model(model_file):
13
  """Load a sentence transformer model from a file."""
@@ -21,6 +21,7 @@ def load_data(file_path):
21
  """Load data from a CSV file and preprocess it."""
22
  data_frame = pd.read_csv(file_path)
23
  data_frame["id"] = data_frame.index
 
24
  # Create a 'QNA' column that combines 'Questions' and 'Answers'
25
  data_frame['QNA'] = data_frame.apply(lambda row: f"Question: {row['Questions']}, Answer: {row['Answers']}", axis=1)
26
  return data_frame.set_index(["id"], drop=False)
@@ -29,15 +30,20 @@ def search_content(query, data_frame_indexed, transformer_model, faiss_index, k=
29
  """Search the content using a query and return the top k results."""
30
  # Encode the query using the model
31
  query_vector = transformer_model.encode([query])
 
32
  # Normalize the query vector
33
  faiss.normalize_L2(query_vector)
 
34
  # Search the Faiss index using the query vector
35
  top_k = faiss_index.search(query_vector, k)
 
36
  # Extract the IDs and similarities of the top k results
37
  ids = top_k[1][0].tolist()
38
  similarities = top_k[0][0].tolist()
 
39
  # Get the corresponding results from the data frame
40
  results = data_frame_indexed.loc[ids]
 
41
  # Add a column for the similarities
42
  results["similarities"] = similarities
43
  return results
 
7
  # Define paths for model, Faiss index, and data file
8
  MODEL_SAVE_PATH = "all-distilroberta-v1-model.pkl"
9
  FAISS_INDEX_FILE_PATH = "index.faiss"
10
+ DATA_FILE_PATH = "omdena_qna_dataset/omdena_faq_training_data.csv"
11
 
12
  def load_transformer_model(model_file):
13
  """Load a sentence transformer model from a file."""
 
21
  """Load data from a CSV file and preprocess it."""
22
  data_frame = pd.read_csv(file_path)
23
  data_frame["id"] = data_frame.index
24
+
25
  # Create a 'QNA' column that combines 'Questions' and 'Answers'
26
  data_frame['QNA'] = data_frame.apply(lambda row: f"Question: {row['Questions']}, Answer: {row['Answers']}", axis=1)
27
  return data_frame.set_index(["id"], drop=False)
 
30
  """Search the content using a query and return the top k results."""
31
  # Encode the query using the model
32
  query_vector = transformer_model.encode([query])
33
+
34
  # Normalize the query vector
35
  faiss.normalize_L2(query_vector)
36
+
37
  # Search the Faiss index using the query vector
38
  top_k = faiss_index.search(query_vector, k)
39
+
40
  # Extract the IDs and similarities of the top k results
41
  ids = top_k[1][0].tolist()
42
  similarities = top_k[0][0].tolist()
43
+
44
  # Get the corresponding results from the data frame
45
  results = data_frame_indexed.loc[ids]
46
+
47
  # Add a column for the similarities
48
  results["similarities"] = similarities
49
  return results