alfa95 commited on
Commit
b907e11
Β·
1 Parent(s): 66e0254
Files changed (1) hide show
  1. app.py +29 -7
app.py CHANGED
@@ -94,12 +94,17 @@ def store_in_faiss(chunks):
94
  return faiss_index
95
 
96
 
97
- # πŸ”Ή 5. Retrieve Chunks using BM25
98
  def retrieve_bm25(query, top_k=2):
99
  tokenized_query = query.split()
100
  scores = bm25.get_scores(tokenized_query)
101
- top_indices = np.argsort(scores)[-top_k:][::-1]
102
- retrieved_chunks = [chunk_texts[i] for i in top_indices]
 
 
 
 
 
103
  return retrieved_chunks
104
 
105
 
@@ -135,17 +140,34 @@ def refine_with_gemini(query, retrieved_text):
135
  return "⚠️ Gemini API Exception: Unable to fetch response."
136
 
137
 
138
- # πŸ”Ή 7. Final Retrieval Function
139
  def retrieve_and_generate_secure(query):
140
  print("πŸ” Query Received:", query)
141
  if bm25 is None or not chunk_texts:
142
  return "❌ No PDF data loaded. Please upload a PDF first."
143
-
144
  bm25_results = retrieve_bm25(query)
145
  if not bm25_results:
146
  return "❌ No relevant financial data found for your query."
147
-
148
- return refine_with_gemini(query, "\n".join(bm25_results))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
 
151
  # πŸ”Ή 8. Load PDF and Process Data
 
94
  return faiss_index
95
 
96
 
97
+ # πŸ”Ή 5. Retrieve Chunks using BM25 with Scores
98
  def retrieve_bm25(query, top_k=2):
99
  tokenized_query = query.split()
100
  scores = bm25.get_scores(tokenized_query)
101
+ top_indices = np.argsort(scores)[-top_k:][::-1] # Get top indices
102
+
103
+ # Normalize BM25 scores
104
+ min_score, max_score = np.min(scores), np.max(scores)
105
+ normalized_scores = [(scores[i] - min_score) / (max_score - min_score) if max_score != min_score else 1 for i in top_indices]
106
+
107
+ retrieved_chunks = [(chunk_texts[i], normalized_scores[idx]) for idx, i in enumerate(top_indices)]
108
  return retrieved_chunks
109
 
110
 
 
140
  return "⚠️ Gemini API Exception: Unable to fetch response."
141
 
142
 
143
+ # πŸ”Ή 7. Final Retrieval Function with Confidence Score
144
  def retrieve_and_generate_secure(query):
145
  print("πŸ” Query Received:", query)
146
  if bm25 is None or not chunk_texts:
147
  return "❌ No PDF data loaded. Please upload a PDF first."
148
+
149
  bm25_results = retrieve_bm25(query)
150
  if not bm25_results:
151
  return "❌ No relevant financial data found for your query."
152
+
153
+ # Extract text and confidence scores
154
+ retrieved_texts, bm25_confidences = zip(*bm25_results)
155
+
156
+ # Average BM25 Confidence Score
157
+ avg_bm25_confidence = sum(bm25_confidences) / len(bm25_confidences)
158
+
159
+ # Get FAISS Similarity Score
160
+ query_embedding = embed_model.encode([query])
161
+ D, I = faiss_index.search(query_embedding, 1) # Top-1 FAISS retrieval
162
+ faiss_confidence = 1 / (1 + D[0][0]) if D[0][0] != 0 else 1 # Convert distance to similarity
163
+
164
+ # Combine Confidence Scores (Weighted Average)
165
+ final_confidence = (0.6 * avg_bm25_confidence) + (0.4 * faiss_confidence)
166
+
167
+ # Generate Final Answer
168
+ final_answer = refine_with_gemini(query, "\n".join(retrieved_texts))
169
+
170
+ return f"πŸ’¬ Answer: {final_answer}\n\nπŸ”Ή Confidence Score: {round(final_confidence * 100, 2)}%"
171
 
172
 
173
  # πŸ”Ή 8. Load PDF and Process Data