Shreyas094
commited on
Commit
•
bc6c726
1
Parent(s):
b578a48
Update app.py
Browse files
app.py
CHANGED
@@ -39,6 +39,8 @@ from typing import List, Dict, Tuple
|
|
39 |
import datetime
|
40 |
from abc import ABC, abstractmethod
|
41 |
from typing import List, Dict, Any
|
|
|
|
|
42 |
|
43 |
# Automatically get the current year
|
44 |
CURRENT_YEAR = datetime.datetime.now().year
|
@@ -105,8 +107,9 @@ groq_client = Groq(api_key=GROQ_API_KEY)
|
|
105 |
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
106 |
mistral_client = Mistral(api_key=MISTRAL_API_KEY)
|
107 |
|
108 |
-
|
109 |
-
|
|
|
110 |
|
111 |
# Step 1: Create a base class for AI models
|
112 |
class AIModel(ABC):
|
@@ -645,9 +648,9 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
645 |
bm25_scores = bm25.get_scores(query)
|
646 |
|
647 |
# Step 4: Get semantic similarity scores
|
648 |
-
query_embedding = similarity_model.
|
649 |
doc_summaries = [doc['summary'] for doc in valid_docs]
|
650 |
-
doc_embeddings = similarity_model.
|
651 |
semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
|
652 |
|
653 |
# Step 5: Combine scores (normalize first)
|
@@ -679,11 +682,11 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
679 |
continue
|
680 |
|
681 |
# Check similarity with already selected documents
|
682 |
-
doc_embedding = similarity_model.
|
683 |
is_similar = False
|
684 |
|
685 |
for content in added_contents:
|
686 |
-
content_embedding = similarity_model.
|
687 |
similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
|
688 |
if similarity > similarity_threshold:
|
689 |
is_similar = True
|
@@ -705,8 +708,8 @@ def rerank_documents(query: str, documents: List[Dict],
|
|
705 |
|
706 |
def compute_similarity(text1, text2):
|
707 |
# Encode the texts
|
708 |
-
embedding1 = similarity_model.
|
709 |
-
embedding2 = similarity_model.
|
710 |
|
711 |
# Compute cosine similarity
|
712 |
cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
|
|
|
39 |
import datetime
|
40 |
from abc import ABC, abstractmethod
|
41 |
from typing import List, Dict, Any
|
42 |
+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
43 |
+
|
44 |
|
45 |
# Automatically get the current year
|
46 |
CURRENT_YEAR = datetime.datetime.now().year
|
|
|
107 |
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
108 |
mistral_client = Mistral(api_key=MISTRAL_API_KEY)
|
109 |
|
110 |
+
similarity_model = HuggingFaceInferenceAPIEmbeddings(
|
111 |
+
api_key=HF_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
112 |
+
)
|
113 |
|
114 |
# Step 1: Create a base class for AI models
|
115 |
class AIModel(ABC):
|
|
|
648 |
bm25_scores = bm25.get_scores(query)
|
649 |
|
650 |
# Step 4: Get semantic similarity scores
|
651 |
+
query_embedding = similarity_model.embed_query(query)
|
652 |
doc_summaries = [doc['summary'] for doc in valid_docs]
|
653 |
+
doc_embeddings = similarity_model.embed_query(doc_summaries)
|
654 |
semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
|
655 |
|
656 |
# Step 5: Combine scores (normalize first)
|
|
|
682 |
continue
|
683 |
|
684 |
# Check similarity with already selected documents
|
685 |
+
doc_embedding = similarity_model.embed_query(doc['summary'])
|
686 |
is_similar = False
|
687 |
|
688 |
for content in added_contents:
|
689 |
+
content_embedding = similarity_model.embed_query(content)
|
690 |
similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
|
691 |
if similarity > similarity_threshold:
|
692 |
is_similar = True
|
|
|
708 |
|
709 |
def compute_similarity(text1, text2):
|
710 |
# Encode the texts
|
711 |
+
embedding1 = similarity_model.embed_query(text1)
|
712 |
+
embedding2 = similarity_model.embed_query(text2)
|
713 |
|
714 |
# Compute cosine similarity
|
715 |
cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
|