Shreyas094
commited on
Commit
•
d221460
1
Parent(s):
7ccb084
Update app.py
Browse files
app.py
CHANGED
@@ -39,8 +39,6 @@ from typing import List, Dict, Tuple
|
|
39 |
import datetime
|
40 |
from abc import ABC, abstractmethod
|
41 |
from typing import List, Dict, Any
|
42 |
-
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
43 |
-
|
44 |
|
45 |
# Automatically get the current year
|
46 |
CURRENT_YEAR = datetime.datetime.now().year
|
@@ -107,9 +105,8 @@ groq_client = Groq(api_key=GROQ_API_KEY)
|
|
107 |
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
108 |
mistral_client = Mistral(api_key=MISTRAL_API_KEY)
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
)
|
113 |
|
114 |
# Step 1: Create a base class for AI models
|
115 |
class AIModel(ABC):
|
@@ -648,9 +645,9 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
648 |
bm25_scores = bm25.get_scores(query)
|
649 |
|
650 |
# Step 4: Get semantic similarity scores
|
651 |
-
query_embedding = similarity_model.
|
652 |
doc_summaries = [doc['summary'] for doc in valid_docs]
|
653 |
-
doc_embeddings = similarity_model.
|
654 |
semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
|
655 |
|
656 |
# Step 5: Combine scores (normalize first)
|
@@ -682,11 +679,11 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
682 |
continue
|
683 |
|
684 |
# Check similarity with already selected documents
|
685 |
-
doc_embedding = similarity_model.
|
686 |
is_similar = False
|
687 |
|
688 |
for content in added_contents:
|
689 |
-
content_embedding = similarity_model.
|
690 |
similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
|
691 |
if similarity > similarity_threshold:
|
692 |
is_similar = True
|
@@ -708,8 +705,8 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
708 |
|
709 |
def compute_similarity(text1, text2):
|
710 |
# Encode the texts
|
711 |
-
embedding1 = similarity_model.
|
712 |
-
embedding2 = similarity_model.
|
713 |
|
714 |
# Compute cosine similarity
|
715 |
cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
|
|
|
39 |
import datetime
|
40 |
from abc import ABC, abstractmethod
|
41 |
from typing import List, Dict, Any
|
|
|
|
|
42 |
|
43 |
# Automatically get the current year
|
44 |
CURRENT_YEAR = datetime.datetime.now().year
|
|
|
105 |
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
106 |
mistral_client = Mistral(api_key=MISTRAL_API_KEY)
|
107 |
|
108 |
+
# Initialize the similarity model
|
109 |
+
similarity_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
|
|
|
110 |
|
111 |
# Step 1: Create a base class for AI models
|
112 |
class AIModel(ABC):
|
|
|
645 |
bm25_scores = bm25.get_scores(query)
|
646 |
|
647 |
# Step 4: Get semantic similarity scores
|
648 |
+
query_embedding = similarity_model.encode(query)
|
649 |
doc_summaries = [doc['summary'] for doc in valid_docs]
|
650 |
+
doc_embeddings = similarity_model.encode(doc_summaries)
|
651 |
semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
|
652 |
|
653 |
# Step 5: Combine scores (normalize first)
|
|
|
679 |
continue
|
680 |
|
681 |
# Check similarity with already selected documents
|
682 |
+
doc_embedding = similarity_model.encode(doc['summary'])
|
683 |
is_similar = False
|
684 |
|
685 |
for content in added_contents:
|
686 |
+
content_embedding = similarity_model.encode(content)
|
687 |
similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
|
688 |
if similarity > similarity_threshold:
|
689 |
is_similar = True
|
|
|
705 |
|
706 |
def compute_similarity(text1, text2):
|
707 |
# Encode the texts
|
708 |
+
embedding1 = similarity_model.encode(text1)
|
709 |
+
embedding2 = similarity_model.encode(text2)
|
710 |
|
711 |
# Compute cosine similarity
|
712 |
cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
|