Spaces:

Cachoups
/

FinanceReport

Sleeping

Cachoups commited on Sep 23, 2024

Commit

acecfc3

verified ·

1 Parent(s): 5ab1c16

Update lib/comparison.py

Files changed (1) hide show

lib/comparison.py CHANGED Viewed

@@ -27,22 +27,19 @@ def compute_similarity(embeddings1, embeddings2):
     """Compute pairwise cosine similarity between two sets of embeddings."""
     return cosine_similarity(embeddings1, embeddings2)
-# Compare a paragraph with a list of other paragraphs
-def compare_summaries(paragraph, paragraphs):
-    """
-    Compare a single paragraph with a list of summaries,
-    and return the most similar summary along with the similarity score.
-    """
-    # Get embeddings for the paragraph and the list of summaries
-    paragraph_embedding = get_bert_embeddings([paragraph])[0]  # Single paragraph embedding
-    summaries_embeddings = get_bert_embeddings(paragraphs)      # Embeddings for list of paragraphs
-    # Compute similarity between the paragraph and each summary
-    similarities = compute_similarity([paragraph_embedding], summaries_embeddings)[0]
-    # Find the most similar summary
-    most_similar_index = np.argmax(similarities)               # Get index of most similar summary
-    most_similar_summary = paragraphs[most_similar_index]       # Corresponding summary
-    similarity_score = similarities[most_similar_index]        # Similarity score
-    return most_similar_summary

     """Compute pairwise cosine similarity between two sets of embeddings."""
     return cosine_similarity(embeddings1, embeddings2)
+def compare_selected_paragraph(paragraph, stored_paragraphs):
+    """Compare the selected paragraph with stored paragraphs."""
+    # Here, 'stored_paragraphs' would be available inside the function
+    # Perform the comparison
+    embeddings1 = get_bert_embeddings([paragraph])  # Get embedding for the selected paragraph
+    embeddings2 = get_bert_embeddings(stored_paragraphs)  # Get embeddings for stored paragraphs
+    similarity_matrix = compute_similarity(embeddings1, embeddings2)
+    # Find the most similar paragraph
+    most_similar_index = np.argmax(similarity_matrix[0])
+    most_similar_paragraph = stored_paragraphs[most_similar_index]
+    similarity_score = similarity_matrix[0][most_similar_index]
+    return f"Most similar paragraph {most_similar_index+1}: {most_similar_paragraph}\nSimilarity score: {similarity_score:.2f}"