Shreyas094 commited on
Commit
bc6c726
1 Parent(s): b578a48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -39,6 +39,8 @@ from typing import List, Dict, Tuple
39
  import datetime
40
  from abc import ABC, abstractmethod
41
  from typing import List, Dict, Any
 
 
42
 
43
  # Automatically get the current year
44
  CURRENT_YEAR = datetime.datetime.now().year
@@ -105,8 +107,9 @@ groq_client = Groq(api_key=GROQ_API_KEY)
105
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
106
  mistral_client = Mistral(api_key=MISTRAL_API_KEY)
107
 
108
- # Initialize the similarity model
109
- similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
 
110
 
111
  # Step 1: Create a base class for AI models
112
  class AIModel(ABC):
@@ -645,9 +648,9 @@ def rerank_documents(query: str, documents: List[Dict],
645
  bm25_scores = bm25.get_scores(query)
646
 
647
  # Step 4: Get semantic similarity scores
648
- query_embedding = similarity_model.encode(query, convert_to_tensor=True)
649
  doc_summaries = [doc['summary'] for doc in valid_docs]
650
- doc_embeddings = similarity_model.encode(doc_summaries, convert_to_tensor=True)
651
  semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
652
 
653
  # Step 5: Combine scores (normalize first)
@@ -679,11 +682,11 @@ def rerank_documents(query: str, documents: List[Dict],
679
  continue
680
 
681
  # Check similarity with already selected documents
682
- doc_embedding = similarity_model.encode(doc['summary'], convert_to_tensor=True)
683
  is_similar = False
684
 
685
  for content in added_contents:
686
- content_embedding = similarity_model.encode(content, convert_to_tensor=True)
687
  similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
688
  if similarity > similarity_threshold:
689
  is_similar = True
@@ -705,8 +708,8 @@ def rerank_documents(query: str, documents: List[Dict],
705
 
706
  def compute_similarity(text1, text2):
707
  # Encode the texts
708
- embedding1 = similarity_model.encode(text1, convert_to_tensor=True)
709
- embedding2 = similarity_model.encode(text2, convert_to_tensor=True)
710
 
711
  # Compute cosine similarity
712
  cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
 
39
  import datetime
40
  from abc import ABC, abstractmethod
41
  from typing import List, Dict, Any
42
+ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
43
+
44
 
45
  # Automatically get the current year
46
  CURRENT_YEAR = datetime.datetime.now().year
 
107
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
108
  mistral_client = Mistral(api_key=MISTRAL_API_KEY)
109
 
110
+ similarity_model = HuggingFaceInferenceAPIEmbeddings(
111
+ api_key=HF_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
112
+ )
113
 
114
  # Step 1: Create a base class for AI models
115
  class AIModel(ABC):
 
648
  bm25_scores = bm25.get_scores(query)
649
 
650
  # Step 4: Get semantic similarity scores
651
+ query_embedding = similarity_model.embed_query(query)
652
  doc_summaries = [doc['summary'] for doc in valid_docs]
653
+ doc_embeddings = similarity_model.embed_query(doc_summaries)
654
  semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
655
 
656
  # Step 5: Combine scores (normalize first)
 
682
  continue
683
 
684
  # Check similarity with already selected documents
685
+ doc_embedding = similarity_model.embed_query(doc['summary'])
686
  is_similar = False
687
 
688
  for content in added_contents:
689
+ content_embedding = similarity_model.embed_query(content)
690
  similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
691
  if similarity > similarity_threshold:
692
  is_similar = True
 
708
 
709
  def compute_similarity(text1, text2):
710
  # Encode the texts
711
+ embedding1 = similarity_model.embed_query(text1)
712
+ embedding2 = similarity_model.embed_query(text2)
713
 
714
  # Compute cosine similarity
715
  cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)