rishabhpr commited on
Commit
33552fd
·
verified ·
1 Parent(s): a2c958a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -39,21 +39,26 @@ for message in st.session_state.messages[1:]: # Skip the system message
39
  with st.chat_message(message["role"]):
40
  st.markdown(message["content"])
41
 
 
 
42
  # Function to find the top 1 most similar question based on user input
43
  def find_top_question(query):
44
  # Generate embedding for the query
45
  query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
46
 
47
- # Compute cosine similarity between query embedding and dataset embeddings using scikit-learn's pairwise_distances_reduction
48
- similarities = cosine_similarity(query_embedding, embeddings).flatten()
49
-
 
 
 
50
  # Get the index of the most similar result (top 1)
51
- top_index = similarities.indices[0] # Index of highest similarity
52
-
53
  # Retrieve metadata for the top result
54
  top_result = metadata.iloc[top_index].copy()
55
- top_result['similarity_score'] = similarities.distances[0]
56
-
57
  return top_result
58
 
59
  # Function to generate response using OpenAI API with debugging logs
 
39
  with st.chat_message(message["role"]):
40
  st.markdown(message["content"])
41
 
42
+ from sklearn.metrics.pairwise import cosine_similarity
43
+
44
  # Function to find the top 1 most similar question based on user input
45
  def find_top_question(query):
46
  # Generate embedding for the query
47
  query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
48
 
49
+ # Reshape query_embedding to ensure it is a 2D array
50
+ query_embedding = query_embedding.reshape(1, -1) # Reshape to (1, n_features)
51
+
52
+ # Compute cosine similarity between query embedding and dataset embeddings
53
+ similarities = cosine_similarity(query_embedding, embeddings).flatten() # Flatten to get a 1D array of similarities
54
+
55
  # Get the index of the most similar result (top 1)
56
+ top_index = similarities.argsort()[-1] # Index of highest similarity
57
+
58
  # Retrieve metadata for the top result
59
  top_result = metadata.iloc[top_index].copy()
60
+ top_result['similarity_score'] = similarities[top_index]
61
+
62
  return top_result
63
 
64
  # Function to generate response using OpenAI API with debugging logs