codingprepdemo

Sleeping

rishabhpr commited on Nov 21, 2024

Commit

33552fd

verified ·

1 Parent(s): a2c958a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,21 +39,26 @@ for message in st.session_state.messages[1:]:  # Skip the system message
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 # Function to find the top 1 most similar question based on user input
 def find_top_question(query):
     # Generate embedding for the query
     query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
-    # Compute cosine similarity between query embedding and dataset embeddings using scikit-learn's pairwise_distances_reduction
-    similarities = cosine_similarity(query_embedding, embeddings).flatten()
     # Get the index of the most similar result (top 1)
-    top_index = similarities.indices[0]  # Index of highest similarity
     # Retrieve metadata for the top result
     top_result = metadata.iloc[top_index].copy()
-    top_result['similarity_score'] = similarities.distances[0]
     return top_result
 # Function to generate response using OpenAI API with debugging logs

     with st.chat_message(message["role"]):
         st.markdown(message["content"])
+from sklearn.metrics.pairwise import cosine_similarity
 # Function to find the top 1 most similar question based on user input
 def find_top_question(query):
     # Generate embedding for the query
     query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
+    # Reshape query_embedding to ensure it is a 2D array
+    query_embedding = query_embedding.reshape(1, -1)  # Reshape to (1, n_features)
+    # Compute cosine similarity between query embedding and dataset embeddings
+    similarities = cosine_similarity(query_embedding, embeddings).flatten()  # Flatten to get a 1D array of similarities
     # Get the index of the most similar result (top 1)
+    top_index = similarities.argsort()[-1]  # Index of highest similarity
     # Retrieve metadata for the top result
     top_result = metadata.iloc[top_index].copy()
+    top_result['similarity_score'] = similarities[top_index]
     return top_result
 # Function to generate response using OpenAI API with debugging logs