Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
kiyer
commited on
Commit
•
7d0b0c1
1
Parent(s):
793347c
try fix for index issue
Browse files
app.py
CHANGED
@@ -243,28 +243,28 @@ class RetrievalSystem():
|
|
243 |
query_embedding,
|
244 |
rerank_top_k,
|
245 |
return_scores = False)
|
246 |
-
try:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
except:
|
267 |
-
print('heavy load, please wait 10s and try again.')
|
268 |
else:
|
269 |
top_results, small_df = self.rank_and_filter(query,
|
270 |
query_embedding,
|
@@ -278,6 +278,8 @@ class RetrievalSystem():
|
|
278 |
df = pd.DataFrame(small_df)
|
279 |
df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
|
280 |
links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
|
|
|
|
|
281 |
scores = [top_results[i] for i in top_results]
|
282 |
indices = [i for i in top_results]
|
283 |
df.insert(1,'ADS Link',links,True)
|
@@ -477,7 +479,7 @@ def run_agent_qa(query):
|
|
477 |
|
478 |
def run_rag_qa(query, papers_df):
|
479 |
|
480 |
-
try:
|
481 |
loaders = []
|
482 |
|
483 |
documents = []
|
@@ -497,6 +499,8 @@ def run_rag_qa(query, papers_df):
|
|
497 |
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
|
498 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
|
499 |
|
|
|
|
|
500 |
if st.session_state.question_type == 'Bibliometric':
|
501 |
template = bibliometric_prompt
|
502 |
elif st.session_state.question_type == 'Single-paper':
|
@@ -523,10 +527,10 @@ def run_rag_qa(query, papers_df):
|
|
523 |
rag_answer = rag_chain_with_source.invoke(query, )
|
524 |
vectorstore.delete_collection()
|
525 |
|
526 |
-
except:
|
527 |
-
|
528 |
|
529 |
-
|
530 |
|
531 |
def guess_question_type(query: str):
|
532 |
|
|
|
243 |
query_embedding,
|
244 |
rerank_top_k,
|
245 |
return_scores = False)
|
246 |
+
# try:
|
247 |
+
docs_for_rerank = [small_df['abstract'][i] for i in range(rerank_top_k)]
|
248 |
+
if len(docs_for_rerank) == 0:
|
249 |
+
return []
|
250 |
+
reranked_results = self.cohere_client.rerank(
|
251 |
+
query=query,
|
252 |
+
documents=docs_for_rerank,
|
253 |
+
model='rerank-english-v3.0',
|
254 |
+
top_n=top_k
|
255 |
+
)
|
256 |
+
final_results = []
|
257 |
+
for result in reranked_results.results:
|
258 |
+
doc_id = top_results[result.index]
|
259 |
+
doc_text = docs_for_rerank[result.index]
|
260 |
+
score = float(result.relevance_score)
|
261 |
+
final_results.append([doc_id, "", score])
|
262 |
+
final_indices = [doc[0] for doc in final_results]
|
263 |
+
if return_scores:
|
264 |
+
return {result[0]: result[2] for result in final_results}, self.dataset[final_indices]
|
265 |
+
return [doc[0] for doc in final_results], self.dataset[final_indices]
|
266 |
+
# except:
|
267 |
+
# print('heavy load, please wait 10s and try again.')
|
268 |
else:
|
269 |
top_results, small_df = self.rank_and_filter(query,
|
270 |
query_embedding,
|
|
|
278 |
df = pd.DataFrame(small_df)
|
279 |
df = df.drop(columns=['umap_x','umap_y','cite_bibcodes','ref_bibcodes'])
|
280 |
links = ['https://ui.adsabs.harvard.edu/abs/'+i+'/abstract' for i in small_df['bibcode']]
|
281 |
+
|
282 |
+
# st.write(top_results[0:10])
|
283 |
scores = [top_results[i] for i in top_results]
|
284 |
indices = [i for i in top_results]
|
285 |
df.insert(1,'ADS Link',links,True)
|
|
|
479 |
|
480 |
def run_rag_qa(query, papers_df):
|
481 |
|
482 |
+
# try:
|
483 |
loaders = []
|
484 |
|
485 |
documents = []
|
|
|
499 |
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6, "fetch_k": len(splits)})
|
500 |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
|
501 |
|
502 |
+
|
503 |
+
|
504 |
if st.session_state.question_type == 'Bibliometric':
|
505 |
template = bibliometric_prompt
|
506 |
elif st.session_state.question_type == 'Single-paper':
|
|
|
527 |
rag_answer = rag_chain_with_source.invoke(query, )
|
528 |
vectorstore.delete_collection()
|
529 |
|
530 |
+
# except:
|
531 |
+
# st.subheader('heavy load! please wait 10 seconds and try again.')
|
532 |
|
533 |
+
return rag_answer
|
534 |
|
535 |
def guess_question_type(query: str):
|
536 |
|