Spaces:
Sleeping
Sleeping
app.py
Browse files
app.py
CHANGED
@@ -94,12 +94,17 @@ def store_in_faiss(chunks):
|
|
94 |
return faiss_index
|
95 |
|
96 |
|
97 |
-
# πΉ 5. Retrieve Chunks using BM25
|
98 |
def retrieve_bm25(query, top_k=2):
|
99 |
tokenized_query = query.split()
|
100 |
scores = bm25.get_scores(tokenized_query)
|
101 |
-
top_indices = np.argsort(scores)[-top_k:][::-1]
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
103 |
return retrieved_chunks
|
104 |
|
105 |
|
@@ -135,17 +140,34 @@ def refine_with_gemini(query, retrieved_text):
|
|
135 |
return "β οΈ Gemini API Exception: Unable to fetch response."
|
136 |
|
137 |
|
138 |
-
# πΉ 7. Final Retrieval Function
|
139 |
def retrieve_and_generate_secure(query):
|
140 |
print("π Query Received:", query)
|
141 |
if bm25 is None or not chunk_texts:
|
142 |
return "β No PDF data loaded. Please upload a PDF first."
|
143 |
-
|
144 |
bm25_results = retrieve_bm25(query)
|
145 |
if not bm25_results:
|
146 |
return "β No relevant financial data found for your query."
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
|
151 |
# πΉ 8. Load PDF and Process Data
|
|
|
94 |
return faiss_index
|
95 |
|
96 |
|
97 |
+
# πΉ 5. Retrieve Chunks using BM25 with Scores
|
98 |
def retrieve_bm25(query, top_k=2):
|
99 |
tokenized_query = query.split()
|
100 |
scores = bm25.get_scores(tokenized_query)
|
101 |
+
top_indices = np.argsort(scores)[-top_k:][::-1] # Get top indices
|
102 |
+
|
103 |
+
# Normalize BM25 scores
|
104 |
+
min_score, max_score = np.min(scores), np.max(scores)
|
105 |
+
normalized_scores = [(scores[i] - min_score) / (max_score - min_score) if max_score != min_score else 1 for i in top_indices]
|
106 |
+
|
107 |
+
retrieved_chunks = [(chunk_texts[i], normalized_scores[idx]) for idx, i in enumerate(top_indices)]
|
108 |
return retrieved_chunks
|
109 |
|
110 |
|
|
|
140 |
return "β οΈ Gemini API Exception: Unable to fetch response."
|
141 |
|
142 |
|
143 |
+
# πΉ 7. Final Retrieval Function with Confidence Score
|
144 |
def retrieve_and_generate_secure(query):
|
145 |
print("π Query Received:", query)
|
146 |
if bm25 is None or not chunk_texts:
|
147 |
return "β No PDF data loaded. Please upload a PDF first."
|
148 |
+
|
149 |
bm25_results = retrieve_bm25(query)
|
150 |
if not bm25_results:
|
151 |
return "β No relevant financial data found for your query."
|
152 |
+
|
153 |
+
# Extract text and confidence scores
|
154 |
+
retrieved_texts, bm25_confidences = zip(*bm25_results)
|
155 |
+
|
156 |
+
# Average BM25 Confidence Score
|
157 |
+
avg_bm25_confidence = sum(bm25_confidences) / len(bm25_confidences)
|
158 |
+
|
159 |
+
# Get FAISS Similarity Score
|
160 |
+
query_embedding = embed_model.encode([query])
|
161 |
+
D, I = faiss_index.search(query_embedding, 1) # Top-1 FAISS retrieval
|
162 |
+
faiss_confidence = 1 / (1 + D[0][0]) if D[0][0] != 0 else 1 # Convert distance to similarity
|
163 |
+
|
164 |
+
# Combine Confidence Scores (Weighted Average)
|
165 |
+
final_confidence = (0.6 * avg_bm25_confidence) + (0.4 * faiss_confidence)
|
166 |
+
|
167 |
+
# Generate Final Answer
|
168 |
+
final_answer = refine_with_gemini(query, "\n".join(retrieved_texts))
|
169 |
+
|
170 |
+
return f"π¬ Answer: {final_answer}\n\nπΉ Confidence Score: {round(final_confidence * 100, 2)}%"
|
171 |
|
172 |
|
173 |
# πΉ 8. Load PDF and Process Data
|