Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- app.py +18 -11
- kw_tags.npz +3 -0
app.py
CHANGED
@@ -43,7 +43,7 @@ from openai import OpenAI
|
|
43 |
# import anthropic
|
44 |
import cohere
|
45 |
import faiss
|
46 |
-
|
47 |
import spacy
|
48 |
from string import punctuation
|
49 |
import pytextrank
|
@@ -282,8 +282,8 @@ class RetrievalSystem():
|
|
282 |
indices = [i for i in top_results]
|
283 |
df.insert(1,'ADS Link',links,True)
|
284 |
df.insert(2,'Relevance',scores,True)
|
285 |
-
df.insert(3,'
|
286 |
-
df = df[['ADS Link','Relevance','date','cites','title','authors','abstract','keywords','ads_id','
|
287 |
df.index += 1
|
288 |
return df
|
289 |
|
@@ -391,7 +391,7 @@ def Library(query):
|
|
391 |
papers_df = run_query_ret(st.session_state.query)
|
392 |
op_docs = ''
|
393 |
for i in range(len(papers_df)):
|
394 |
-
op_docs = op_docs + 'Paper %.0f:' %(i+1) + papers_df['title'][i] + '\n' + papers_df['abstract'][i] + '\n\n'
|
395 |
|
396 |
return op_docs
|
397 |
|
@@ -451,7 +451,7 @@ def run_rag_qa(query, papers_df):
|
|
451 |
metadata = {"source": row['ads_id']}
|
452 |
doc = Document(page_content=content, metadata=metadata)
|
453 |
documents.append(doc)
|
454 |
-
my_bar.progress((i
|
455 |
|
456 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=50, add_start_index=True)
|
457 |
|
@@ -562,7 +562,7 @@ def calc_outlier_flag(papers_df, top_k, cutoff_adjust = 0.1):
|
|
562 |
|
563 |
def make_embedding_plot(papers_df, consensus_answer):
|
564 |
|
565 |
-
plt_indices = np.array(papers_df['
|
566 |
|
567 |
if 'arxiv_corpus' not in st.session_state:
|
568 |
st.session_state.arxiv_corpus = load_arxiv_corpus()
|
@@ -574,10 +574,17 @@ def make_embedding_plot(papers_df, consensus_answer):
|
|
574 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
575 |
alphas[outlier_flag] = 0.5
|
576 |
|
577 |
-
fig = plt.figure(figsize=(9
|
578 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
579 |
-
|
580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
# plt.scatter(xax[plt_indices][outlier_flag], yax[plt_indices][outlier_flag], s=100, alpha=1., c='firebrick')
|
582 |
plt.axis([0,20,-4.2,18])
|
583 |
plt.axis('off')
|
@@ -589,7 +596,7 @@ def make_embedding_plot(papers_df, consensus_answer):
|
|
589 |
|
590 |
if st.session_state.get('runpfdr'):
|
591 |
with st.spinner(search_text_list[np.random.choice(len(search_text_list))]):
|
592 |
-
st.write('Settings: [Kw:',toggle_a, 'Time:',toggle_b, 'Cite:',toggle_c, '] top_k:',top_k, 'retrieval:',method)
|
593 |
|
594 |
papers_df = run_query_ret(st.session_state.query)
|
595 |
st.header(st.session_state.query)
|
@@ -636,7 +643,7 @@ if st.session_state.get('runpfdr'):
|
|
636 |
|
637 |
with st.spinner("Evaluating abstract consensus"):
|
638 |
with st.expander("Abstract consensus", expanded=True):
|
639 |
-
consensus_answer = evaluate_overall_consensus(query, [papers_df['abstract'][i] for i in range(len(papers_df))])
|
640 |
st.subheader("Consensus: "+consensus_answer.consensus)
|
641 |
st.markdown(consensus_answer.explanation)
|
642 |
st.markdown('Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score)
|
|
|
43 |
# import anthropic
|
44 |
import cohere
|
45 |
import faiss
|
46 |
+
import matplotlib.pyplot as plt
|
47 |
import spacy
|
48 |
from string import punctuation
|
49 |
import pytextrank
|
|
|
282 |
indices = [i for i in top_results]
|
283 |
df.insert(1,'ADS Link',links,True)
|
284 |
df.insert(2,'Relevance',scores,True)
|
285 |
+
df.insert(3,'indices',indices,True)
|
286 |
+
df = df[['ADS Link','Relevance','date','cites','title','authors','abstract','keywords','ads_id','indices','embed']]
|
287 |
df.index += 1
|
288 |
return df
|
289 |
|
|
|
391 |
papers_df = run_query_ret(st.session_state.query)
|
392 |
op_docs = ''
|
393 |
for i in range(len(papers_df)):
|
394 |
+
op_docs = op_docs + 'Paper %.0f:' %(i+1) + papers_df['title'][i+1] + '\n' + papers_df['abstract'][i+1] + '\n\n'
|
395 |
|
396 |
return op_docs
|
397 |
|
|
|
451 |
metadata = {"source": row['ads_id']}
|
452 |
doc = Document(page_content=content, metadata=metadata)
|
453 |
documents.append(doc)
|
454 |
+
my_bar.progress((i)/len(papers_df), text='adding documents to LLM context')
|
455 |
|
456 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=50, add_start_index=True)
|
457 |
|
|
|
562 |
|
563 |
def make_embedding_plot(papers_df, consensus_answer):
|
564 |
|
565 |
+
plt_indices = np.array(papers_df['indices'].tolist())
|
566 |
|
567 |
if 'arxiv_corpus' not in st.session_state:
|
568 |
st.session_state.arxiv_corpus = load_arxiv_corpus()
|
|
|
574 |
alphas = np.ones((len(plt_indices),)) * 0.9
|
575 |
alphas[outlier_flag] = 0.5
|
576 |
|
577 |
+
fig = plt.figure(figsize=(9*2.,12*2.))
|
578 |
plt.scatter(xax,yax, s=1, alpha=0.01, c='k')
|
579 |
+
|
580 |
+
clkws = np.load('kw_tags.npz')
|
581 |
+
all_x, all_y, all_topics, repeat_flag = clkws['all_x'], clkws['all_y'], clkws['all_topics'], clkws['repeat_flag']
|
582 |
+
for i in range(len(all_topics)):
|
583 |
+
if repeat_flag[i] == False:
|
584 |
+
plt.text(all_x[i], all_y[i], all_topics[i],fontsize=9,ha="center", va="center",
|
585 |
+
bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.3',alpha=0.81))
|
586 |
+
plt.scatter(xax[plt_indices], yax[plt_indices], s=300*alphas**2, alpha=alphas, c='w',zorder=1000)
|
587 |
+
plt.scatter(xax[plt_indices], yax[plt_indices], s=100*alphas**2, alpha=alphas, c='dodgerblue',zorder=1001)
|
588 |
# plt.scatter(xax[plt_indices][outlier_flag], yax[plt_indices][outlier_flag], s=100, alpha=1., c='firebrick')
|
589 |
plt.axis([0,20,-4.2,18])
|
590 |
plt.axis('off')
|
|
|
596 |
|
597 |
if st.session_state.get('runpfdr'):
|
598 |
with st.spinner(search_text_list[np.random.choice(len(search_text_list))]):
|
599 |
+
st.write('Settings: [Kw:',toggle_a, 'Time:',toggle_b, 'Cite:',toggle_c, '] top_k:',top_k, 'retrieval: `',method+'`')
|
600 |
|
601 |
papers_df = run_query_ret(st.session_state.query)
|
602 |
st.header(st.session_state.query)
|
|
|
643 |
|
644 |
with st.spinner("Evaluating abstract consensus"):
|
645 |
with st.expander("Abstract consensus", expanded=True):
|
646 |
+
consensus_answer = evaluate_overall_consensus(query, [papers_df['abstract'][i+1] for i in range(len(papers_df))])
|
647 |
st.subheader("Consensus: "+consensus_answer.consensus)
|
648 |
st.markdown(consensus_answer.explanation)
|
649 |
st.markdown('Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score)
|
kw_tags.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d7068524d3d3029b8d36f4dd2fdf20d5c8a12fc69d8d1a404921aa54a6b40a8
|
3 |
+
size 17849
|