Spaces:
Sleeping
Sleeping
trying to make things a bit faster
Browse files
app.py
CHANGED
@@ -131,6 +131,10 @@ if 'ids' not in st.session_state:
|
|
131 |
st.session_state.cites = arxiv_corpus['cites']
|
132 |
st.session_state.years = arxiv_corpus['date']
|
133 |
st.session_state.kws = arxiv_corpus['keywords']
|
|
|
|
|
|
|
|
|
134 |
st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
|
135 |
|
136 |
|
@@ -194,7 +198,7 @@ class EmbeddingRetrievalSystem():
|
|
194 |
self.abstract = st.session_state.abstracts
|
195 |
self.client = OpenAI(api_key = openai_key)
|
196 |
self.embed_model = "text-embedding-3-small"
|
197 |
-
self.dataset = arxiv_corpus
|
198 |
self.kws = st.session_state.kws
|
199 |
self.cites = st.session_state.cites
|
200 |
|
@@ -495,7 +499,7 @@ def Library(query, top_k = 7):
|
|
495 |
rs = get_topk(query, top_k = top_k)
|
496 |
op_docs = ''
|
497 |
for paperno, i in enumerate(rs):
|
498 |
-
op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.
|
499 |
|
500 |
return op_docs
|
501 |
|
@@ -504,7 +508,7 @@ def Library2(query, top_k = 7):
|
|
504 |
absts, fnames = [], []
|
505 |
for paperno, i in enumerate(rs):
|
506 |
absts.append(st.session_state.abstracts[i])
|
507 |
-
fnames.append(st.session_state.
|
508 |
return absts, fnames, rs
|
509 |
|
510 |
def get_paper_df(ids):
|
@@ -513,10 +517,10 @@ def get_paper_df(ids):
|
|
513 |
for i in ids:
|
514 |
papers.append(st.session_state.titles[i])
|
515 |
scores.append(ids[i])
|
516 |
-
links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.
|
517 |
-
yrs.append(st.session_state.
|
518 |
-
cites.append(st.session_state.
|
519 |
-
kws.append(st.session_state.
|
520 |
|
521 |
return pd.DataFrame({
|
522 |
'Title': papers,
|
@@ -587,10 +591,10 @@ def create_embedding_plot(rs):
|
|
587 |
"""
|
588 |
|
589 |
pltsource = ColumnDataSource(data=dict(
|
590 |
-
x=st.session_state.
|
591 |
-
y=st.session_state.
|
592 |
title=st.session_state.titles,
|
593 |
-
link=st.session_state.
|
594 |
))
|
595 |
|
596 |
rsflag = np.zeros((len(st.session_state.ids),))
|
@@ -995,8 +999,8 @@ def main():
|
|
995 |
column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
|
996 |
)
|
997 |
|
998 |
-
with st.expander("Embedding map", expanded=False):
|
999 |
-
|
1000 |
|
1001 |
col1, col2 = st.columns(2)
|
1002 |
|
|
|
131 |
st.session_state.cites = arxiv_corpus['cites']
|
132 |
st.session_state.years = arxiv_corpus['date']
|
133 |
st.session_state.kws = arxiv_corpus['keywords']
|
134 |
+
st.session_state.ads_kws = arxiv_corpus['ads_keywords']
|
135 |
+
st.session_state.bibcode = arxiv_corpus['bibcode']
|
136 |
+
st.session_state.umap_x = arxiv_corpus['umap_x']
|
137 |
+
st.session_state.umap_y = arxiv_corpus['umap_y']
|
138 |
st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
|
139 |
|
140 |
|
|
|
198 |
self.abstract = st.session_state.abstracts
|
199 |
self.client = OpenAI(api_key = openai_key)
|
200 |
self.embed_model = "text-embedding-3-small"
|
201 |
+
self.dataset = st.session_state.arxiv_corpus
|
202 |
self.kws = st.session_state.kws
|
203 |
self.cites = st.session_state.cites
|
204 |
|
|
|
499 |
rs = get_topk(query, top_k = top_k)
|
500 |
op_docs = ''
|
501 |
for paperno, i in enumerate(rs):
|
502 |
+
op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.bibcode[i][0:4] + ') ' + st.session_state.titles[i] + '\n' + st.session_state.abstracts[i] + '\n\n'
|
503 |
|
504 |
return op_docs
|
505 |
|
|
|
508 |
absts, fnames = [], []
|
509 |
for paperno, i in enumerate(rs):
|
510 |
absts.append(st.session_state.abstracts[i])
|
511 |
+
fnames.append(st.session_state.bibcode[i])
|
512 |
return absts, fnames, rs
|
513 |
|
514 |
def get_paper_df(ids):
|
|
|
517 |
for i in ids:
|
518 |
papers.append(st.session_state.titles[i])
|
519 |
scores.append(ids[i])
|
520 |
+
links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.bibcode[i]+'/abstract')
|
521 |
+
yrs.append(st.session_state.bibcode[i][0:4])
|
522 |
+
cites.append(st.session_state.cites[i])
|
523 |
+
kws.append(st.session_state.ads_kws[i])
|
524 |
|
525 |
return pd.DataFrame({
|
526 |
'Title': papers,
|
|
|
591 |
"""
|
592 |
|
593 |
pltsource = ColumnDataSource(data=dict(
|
594 |
+
x=st.session_state.umap_x,
|
595 |
+
y=st.session_state.umap_y,
|
596 |
title=st.session_state.titles,
|
597 |
+
link=st.session_state.bibcode,
|
598 |
))
|
599 |
|
600 |
rsflag = np.zeros((len(st.session_state.ids),))
|
|
|
999 |
column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
|
1000 |
)
|
1001 |
|
1002 |
+
# with st.expander("Embedding map", expanded=False):
|
1003 |
+
st.bokeh_chart(embedding_plot)
|
1004 |
|
1005 |
col1, col2 = st.columns(2)
|
1006 |
|