Spaces:

Cachoups
/

FinanceReport

Sleeping

App Files Files Community

Cachoups commited on Sep 16, 2024

Commit

985c608

verified ·

1 Parent(s): 5951118

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -42

app.py CHANGED Viewed

@@ -17,48 +17,10 @@ nlp.add_pipe('sentencizer')
 #model = BertModel.from_pretrained(bert_model_name)
 #model.eval()
-import torch
-import numpy as np
-from sklearn.metrics.pairwise import cosine_similarity
-def get_bert_embeddings(texts):
-    """Obtain BERT embeddings for a list of texts."""
-    embeddings = []
-    with torch.no_grad():
-        for text in texts:
-            inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
-            outputs = model(**inputs)
-            # Take the mean of token embeddings as the sentence embedding
-            embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
-            embeddings.append(embedding)
-    return np.array(embeddings)
-def compute_similarity(embedding1, embeddings2):
-    """Compute cosine similarity between a single embedding and a set of embeddings."""
-    return cosine_similarity([embedding1], embeddings2)[0]
-def compare_paragraph_to_list(paragraph, paragraph_list, top_n=3):
-    """Compare a single paragraph to a list of paragraphs and return the top N most similar ones."""
-    # Get embedding for the target paragraph
-    target_embedding = get_bert_embeddings([paragraph])[0]  # Only one paragraph
-    # Get embeddings for the list of paragraphs
-    list_embeddings = get_bert_embeddings(paragraph_list)
-    # Compute similarity between the target and each paragraph in the list
-    similarity_scores = compute_similarity(target_embedding, list_embeddings)
-    # Combine paragraphs with their similarity scores
-    results = [
-        {'compared_paragraph': paragraph_list[i], 'similarity_score': similarity_scores[i]}
-        for i in range(len(paragraph_list))
-    ]
-    # Sort the results by similarity score in descending order and take the top N
-    sorted_results = sorted(results, key=lambda x: x['similarity_score'], reverse=True)[:top_n]
-    # Return only the top N most similar paragraphs
-    return sorted_results

 #model = BertModel.from_pretrained(bert_model_name)
 #model.eval()
+#import torch
+#import numpy as np
+#from sklearn.metrics.pairwise import cosine_similarity