omdi-action-server

Sleeping

App Files Files Community

pvanand commited on Aug 24, 2023

Commit

2b67f80

1 Parent(s): 69d0aca

Update actions/search_content.py

Browse files

Files changed (1) hide show

actions/search_content.py +7 -1

actions/search_content.py CHANGED Viewed

@@ -7,7 +7,7 @@ from sentence_transformers import SentenceTransformer
 # Define paths for model, Faiss index, and data file
 MODEL_SAVE_PATH = "all-distilroberta-v1-model.pkl"
 FAISS_INDEX_FILE_PATH = "index.faiss"
-DATA_FILE_PATH = "/content/omdena_faq_training_data.csv"
 def load_transformer_model(model_file):
     """Load a sentence transformer model from a file."""
@@ -21,6 +21,7 @@ def load_data(file_path):
     """Load data from a CSV file and preprocess it."""
     data_frame = pd.read_csv(file_path)
     data_frame["id"] = data_frame.index
     # Create a 'QNA' column that combines 'Questions' and 'Answers'
     data_frame['QNA'] = data_frame.apply(lambda row: f"Question: {row['Questions']}, Answer: {row['Answers']}", axis=1)
     return data_frame.set_index(["id"], drop=False)
@@ -29,15 +30,20 @@ def search_content(query, data_frame_indexed, transformer_model, faiss_index, k=
     """Search the content using a query and return the top k results."""
     # Encode the query using the model
     query_vector = transformer_model.encode([query])
     # Normalize the query vector
     faiss.normalize_L2(query_vector)
     # Search the Faiss index using the query vector
     top_k = faiss_index.search(query_vector, k)
     # Extract the IDs and similarities of the top k results
     ids = top_k[1][0].tolist()
     similarities = top_k[0][0].tolist()
     # Get the corresponding results from the data frame
     results = data_frame_indexed.loc[ids]
     # Add a column for the similarities
     results["similarities"] = similarities
     return results

 # Define paths for model, Faiss index, and data file
 MODEL_SAVE_PATH = "all-distilroberta-v1-model.pkl"
 FAISS_INDEX_FILE_PATH = "index.faiss"
+DATA_FILE_PATH = "omdena_qna_dataset/omdena_faq_training_data.csv"
 def load_transformer_model(model_file):
     """Load a sentence transformer model from a file."""
     """Load data from a CSV file and preprocess it."""
     data_frame = pd.read_csv(file_path)
     data_frame["id"] = data_frame.index
     # Create a 'QNA' column that combines 'Questions' and 'Answers'
     data_frame['QNA'] = data_frame.apply(lambda row: f"Question: {row['Questions']}, Answer: {row['Answers']}", axis=1)
     return data_frame.set_index(["id"], drop=False)
     """Search the content using a query and return the top k results."""
     # Encode the query using the model
     query_vector = transformer_model.encode([query])
     # Normalize the query vector
     faiss.normalize_L2(query_vector)
     # Search the Faiss index using the query vector
     top_k = faiss_index.search(query_vector, k)
     # Extract the IDs and similarities of the top k results
     ids = top_k[1][0].tolist()
     similarities = top_k[0][0].tolist()
     # Get the corresponding results from the data frame
     results = data_frame_indexed.loc[ids]
     # Add a column for the similarities
     results["similarities"] = similarities
     return results