Spaces:

kiyer
/

pathfinder

Running on CPU Upgrade

App Files Files Community

kiyer commited on Jul 29, 2024

Commit

1fa5fdb

1 Parent(s): a1e4f2c

bugfix for semantic_search

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -140,20 +140,20 @@ if 'arxiv_corpus' not in st.session_state:
         st.session_state.arxiv_corpus = arxiv_corpus
         st.toast('loaded arxiv corpus')
-if 'ids' not in st.session_state:
-    with st.spinner('making the LLM talk to the astro papers...'):
-        st.session_state.ids = st.session_state.arxiv_corpus['ads_id']
-        st.session_state.titles = st.session_state.arxiv_corpus['title']
-        st.session_state.abstracts = st.session_state.arxiv_corpus['abstract']
-        st.session_state.authors = st.session_state.arxiv_corpus['authors']
-        st.session_state.cites = st.session_state.arxiv_corpus['cites']
-        st.session_state.years = st.session_state.arxiv_corpus['date']
-        st.session_state.kws = st.session_state.arxiv_corpus['keywords']
-        st.session_state.ads_kws = st.session_state.arxiv_corpus['ads_keywords']
-        st.session_state.bibcode = st.session_state.arxiv_corpus['bibcode']
-        st.session_state.umap_x = st.session_state.arxiv_corpus['umap_x']
-        st.session_state.umap_y = st.session_state.arxiv_corpus['umap_y']
-        st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
 def get_keywords(text):
     result = []
@@ -192,6 +192,8 @@ class EmbeddingRetrievalSystem():
         self.weight_date = weight_date
         self.weight_keywords = weight_keywords
         self.id_to_index = {self.ids[i]: i for i in range(len(self.ids))}
         # self.citation_filter = CitationFilter(self.dataset)
         # self.date_filter = DateFilter(self.dataset['date'])
@@ -339,7 +341,7 @@ class HydeRetrievalSystem(EmbeddingRetrievalSystem):
         # self.anthropic_key = anthropic_key
         # self.generation_client = anthropic.Anthropic(api_key = self.anthropic_key)
-        self.generation_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
     def retrieve(self, query: str, top_k: int = 10, return_scores = False, time_result = None) -> List[Tuple[str, str, float]]:
         if time_result is None:

         st.session_state.arxiv_corpus = arxiv_corpus
         st.toast('loaded arxiv corpus')
+        if 'ids' not in st.session_state:
+            with st.spinner('making the LLM talk to the astro papers...'):
+                st.session_state.ids = st.session_state.arxiv_corpus['ads_id']
+                st.session_state.titles = st.session_state.arxiv_corpus['title']
+                st.session_state.abstracts = st.session_state.arxiv_corpus['abstract']
+                st.session_state.authors = st.session_state.arxiv_corpus['authors']
+                st.session_state.cites = st.session_state.arxiv_corpus['cites']
+                st.session_state.years = st.session_state.arxiv_corpus['date']
+                st.session_state.kws = st.session_state.arxiv_corpus['keywords']
+                st.session_state.ads_kws = st.session_state.arxiv_corpus['ads_keywords']
+                st.session_state.bibcode = st.session_state.arxiv_corpus['bibcode']
+                st.session_state.umap_x = st.session_state.arxiv_corpus['umap_x']
+                st.session_state.umap_y = st.session_state.arxiv_corpus['umap_y']
+                st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
 def get_keywords(text):
     result = []
         self.weight_date = weight_date
         self.weight_keywords = weight_keywords
         self.id_to_index = {self.ids[i]: i for i in range(len(self.ids))}
+        self.generation_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
         # self.citation_filter = CitationFilter(self.dataset)
         # self.date_filter = DateFilter(self.dataset['date'])
         # self.anthropic_key = anthropic_key
         # self.generation_client = anthropic.Anthropic(api_key = self.anthropic_key)
     def retrieve(self, query: str, top_k: int = 10, return_scores = False, time_result = None) -> List[Tuple[str, str, float]]:
         if time_result is None: