Cachoups commited on
Commit
acecfc3
·
verified ·
1 Parent(s): 5ab1c16

Update lib/comparison.py

Browse files
Files changed (1) hide show
  1. lib/comparison.py +14 -17
lib/comparison.py CHANGED
@@ -27,22 +27,19 @@ def compute_similarity(embeddings1, embeddings2):
27
  """Compute pairwise cosine similarity between two sets of embeddings."""
28
  return cosine_similarity(embeddings1, embeddings2)
29
 
30
- # Compare a paragraph with a list of other paragraphs
31
- def compare_summaries(paragraph, paragraphs):
32
- """
33
- Compare a single paragraph with a list of summaries,
34
- and return the most similar summary along with the similarity score.
35
- """
36
- # Get embeddings for the paragraph and the list of summaries
37
- paragraph_embedding = get_bert_embeddings([paragraph])[0] # Single paragraph embedding
38
- summaries_embeddings = get_bert_embeddings(paragraphs) # Embeddings for list of paragraphs
39
 
40
- # Compute similarity between the paragraph and each summary
41
- similarities = compute_similarity([paragraph_embedding], summaries_embeddings)[0]
 
 
 
 
 
 
42
 
43
- # Find the most similar summary
44
- most_similar_index = np.argmax(similarities) # Get index of most similar summary
45
- most_similar_summary = paragraphs[most_similar_index] # Corresponding summary
46
- similarity_score = similarities[most_similar_index] # Similarity score
47
-
48
- return most_similar_summary
 
27
  """Compute pairwise cosine similarity between two sets of embeddings."""
28
  return cosine_similarity(embeddings1, embeddings2)
29
 
30
+ def compare_selected_paragraph(paragraph, stored_paragraphs):
31
+ """Compare the selected paragraph with stored paragraphs."""
32
+ # Here, 'stored_paragraphs' would be available inside the function
33
+ # Perform the comparison
34
+ embeddings1 = get_bert_embeddings([paragraph]) # Get embedding for the selected paragraph
35
+ embeddings2 = get_bert_embeddings(stored_paragraphs) # Get embeddings for stored paragraphs
 
 
 
36
 
37
+ similarity_matrix = compute_similarity(embeddings1, embeddings2)
38
+
39
+ # Find the most similar paragraph
40
+ most_similar_index = np.argmax(similarity_matrix[0])
41
+ most_similar_paragraph = stored_paragraphs[most_similar_index]
42
+ similarity_score = similarity_matrix[0][most_similar_index]
43
+
44
+ return f"Most similar paragraph {most_similar_index+1}: {most_similar_paragraph}\nSimilarity score: {similarity_score:.2f}"
45