kiyer commited on
Commit
e2d52cc
·
verified ·
1 Parent(s): 257c01b

trying to make things a bit faster

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -131,6 +131,10 @@ if 'ids' not in st.session_state:
131
  st.session_state.cites = arxiv_corpus['cites']
132
  st.session_state.years = arxiv_corpus['date']
133
  st.session_state.kws = arxiv_corpus['keywords']
 
 
 
 
134
  st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
135
 
136
 
@@ -194,7 +198,7 @@ class EmbeddingRetrievalSystem():
194
  self.abstract = st.session_state.abstracts
195
  self.client = OpenAI(api_key = openai_key)
196
  self.embed_model = "text-embedding-3-small"
197
- self.dataset = arxiv_corpus
198
  self.kws = st.session_state.kws
199
  self.cites = st.session_state.cites
200
 
@@ -495,7 +499,7 @@ def Library(query, top_k = 7):
495
  rs = get_topk(query, top_k = top_k)
496
  op_docs = ''
497
  for paperno, i in enumerate(rs):
498
- op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.arxiv_corpus['bibcode'][i][0:4] + ') ' + st.session_state.titles[i] + '\n' + st.session_state.abstracts[i] + '\n\n'
499
 
500
  return op_docs
501
 
@@ -504,7 +508,7 @@ def Library2(query, top_k = 7):
504
  absts, fnames = [], []
505
  for paperno, i in enumerate(rs):
506
  absts.append(st.session_state.abstracts[i])
507
- fnames.append(st.session_state.arxiv_corpus['bibcode'][i])
508
  return absts, fnames, rs
509
 
510
  def get_paper_df(ids):
@@ -513,10 +517,10 @@ def get_paper_df(ids):
513
  for i in ids:
514
  papers.append(st.session_state.titles[i])
515
  scores.append(ids[i])
516
- links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.arxiv_corpus['bibcode'][i]+'/abstract')
517
- yrs.append(st.session_state.arxiv_corpus['bibcode'][i][0:4])
518
- cites.append(st.session_state.arxiv_corpus['cites'][i])
519
- kws.append(st.session_state.arxiv_corpus['ads_keywords'][i])
520
 
521
  return pd.DataFrame({
522
  'Title': papers,
@@ -587,10 +591,10 @@ def create_embedding_plot(rs):
587
  """
588
 
589
  pltsource = ColumnDataSource(data=dict(
590
- x=st.session_state.arxiv_corpus['umap_x'],
591
- y=st.session_state.arxiv_corpus['umap_y'],
592
  title=st.session_state.titles,
593
- link=st.session_state.arxiv_corpus['bibcode'],
594
  ))
595
 
596
  rsflag = np.zeros((len(st.session_state.ids),))
@@ -995,8 +999,8 @@ def main():
995
  column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
996
  )
997
 
998
- with st.expander("Embedding map", expanded=False):
999
- st.bokeh_chart(embedding_plot)
1000
 
1001
  col1, col2 = st.columns(2)
1002
 
 
131
  st.session_state.cites = arxiv_corpus['cites']
132
  st.session_state.years = arxiv_corpus['date']
133
  st.session_state.kws = arxiv_corpus['keywords']
134
+ st.session_state.ads_kws = arxiv_corpus['ads_keywords']
135
+ st.session_state.bibcode = arxiv_corpus['bibcode']
136
+ st.session_state.umap_x = arxiv_corpus['umap_x']
137
+ st.session_state.umap_y = arxiv_corpus['umap_y']
138
  st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
139
 
140
 
 
198
  self.abstract = st.session_state.abstracts
199
  self.client = OpenAI(api_key = openai_key)
200
  self.embed_model = "text-embedding-3-small"
201
+ self.dataset = st.session_state.arxiv_corpus
202
  self.kws = st.session_state.kws
203
  self.cites = st.session_state.cites
204
 
 
499
  rs = get_topk(query, top_k = top_k)
500
  op_docs = ''
501
  for paperno, i in enumerate(rs):
502
+ op_docs = op_docs + 'Paper %.0f:' %(paperno+1) +' (published in '+st.session_state.bibcode[i][0:4] + ') ' + st.session_state.titles[i] + '\n' + st.session_state.abstracts[i] + '\n\n'
503
 
504
  return op_docs
505
 
 
508
  absts, fnames = [], []
509
  for paperno, i in enumerate(rs):
510
  absts.append(st.session_state.abstracts[i])
511
+ fnames.append(st.session_state.bibcode[i])
512
  return absts, fnames, rs
513
 
514
  def get_paper_df(ids):
 
517
  for i in ids:
518
  papers.append(st.session_state.titles[i])
519
  scores.append(ids[i])
520
+ links.append('https://ui.adsabs.harvard.edu/abs/'+st.session_state.bibcode[i]+'/abstract')
521
+ yrs.append(st.session_state.bibcode[i][0:4])
522
+ cites.append(st.session_state.cites[i])
523
+ kws.append(st.session_state.ads_kws[i])
524
 
525
  return pd.DataFrame({
526
  'Title': papers,
 
591
  """
592
 
593
  pltsource = ColumnDataSource(data=dict(
594
+ x=st.session_state.umap_x,
595
+ y=st.session_state.umap_y,
596
  title=st.session_state.titles,
597
+ link=st.session_state.bibcode,
598
  ))
599
 
600
  rsflag = np.zeros((len(st.session_state.ids),))
 
999
  column_config = {'ADS Link':st.column_config.LinkColumn(display_text= 'https://ui.adsabs.harvard.edu/abs/(.*?)/abstract')}
1000
  )
1001
 
1002
+ # with st.expander("Embedding map", expanded=False):
1003
+ st.bokeh_chart(embedding_plot)
1004
 
1005
  col1, col2 = st.columns(2)
1006