Spaces:

kiyer
/

synthesist

Sleeping

App Files Files Community

kiyer commited on Jul 20, 2024

Commit

60c8258

1 Parent(s): ac72d36

5pm update

Browse files

Files changed (2) hide show

app.py +144 -20
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -13,6 +13,16 @@ from collections import Counter
 import yaml, json, requests, sys, os, time
 import concurrent.futures
 ts = time.time()
@@ -105,6 +115,39 @@ if 'ids' not in st.session_state:
     st.session_state.kws = arxiv_corpus['keywords']
     st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
 #----------------------------------------------------------------
@@ -527,10 +570,10 @@ else:
 # Function to simulate question answering (replace with actual implementation)
-def answer_question(question, keywords, toggles, method, question_type):
     # Simulated answer (replace with actual logic)
     # return f"Answer to '{question}' using method {method} for {question_type} question."
-    return run_ret(question, 10)
 def get_papers(ids):
@@ -577,19 +620,84 @@ def run_ret(query, top_k):
     output_str = ''
     for i in rs:
         if rs[i] > 0.5:
-            output_str = output_str + '---> ' + st.session_state.titles[i] + '(score: %.2f) \n' %rs[i]
         else:
-            output_str = output_str + '---> ' + st.session_state.titles[i] + '(score: %.2f) \n' %rs[i]
     return output_str, rs
 # Streamlit app
 def main():
     # st.title("Question Answering App")
     # Sidebar (Inputs)
-    st.sidebar.header("Inputs")
     extra_keywords = st.sidebar.text_input("Enter extra keywords (comma-separated):")
     st.sidebar.subheader("Toggles")
@@ -597,8 +705,8 @@ def main():
     toggle_b = st.sidebar.checkbox("Toggle B")
     toggle_c = st.sidebar.checkbox("Toggle C")
-    method = st.sidebar.radio("Choose a method:", ["h1", "h2", "h3"])
-    question_type = st.sidebar.selectbox("Select question type:", ["Type 1", "Type 2", "Type 3"])
     # store_output = st.sidebar.checkbox("Store the output")
@@ -606,7 +714,7 @@ def main():
     # Main page (Outputs)
-    question = st.text_input("Ask me anything:")
     submit_button = st.button("Submit")
     if submit_button:
@@ -615,36 +723,52 @@ def main():
         toggles = {'A': toggle_a, 'B': toggle_b, 'C': toggle_c}
         # Generate outputs
-        answer, rs = answer_question(question, keywords, toggles, method, question_type)
         papers_df = get_papers(rs)
         embedding_plot = create_embedding_plot()
-        triggered_keywords = extract_keywords(question)
         consensus = estimate_consensus()
-        # Display outputs
-        st.subheader("Answer")
         st.write(answer)
-        with st.expander("Papers used", expanded=True):
             st.dataframe(papers_df)
         col1, col2 = st.columns(2)
         with col1:
-            st.subheader("Embedding Map")
-            st.bokeh_chart(embedding_plot)
             st.subheader("Triggered Keywords")
             st.write(", ".join(triggered_keywords))
         with col2:
-            st.subheader("Question Type")
-            st.write(question_type)
             st.subheader("Consensus Estimate")
             st.write(f"{consensus:.2%}")

 import yaml, json, requests, sys, os, time
 import concurrent.futures
+from langchain_community.chat_models import ChatOpenAI as openai_llm
+from langchain_core.runnables import RunnableConfig
+from langchain_community.callbacks import StreamlitCallbackHandler
+from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
+from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
+from langchain.chains import LLMChain
 ts = time.time()
     st.session_state.kws = arxiv_corpus['keywords']
     st.toast('done caching. time taken: %.2f sec' %(time.time()-ts))
+#---------------------------------------------------------------
+# A hack to "clear" the previous result when submitting a new prompt. This avoids
+# the "previous run's text is grayed-out but visible during rerun" Streamlit behavior.
+class DirtyState:
+    NOT_DIRTY = "NOT_DIRTY"
+    DIRTY = "DIRTY"
+    UNHANDLED_SUBMIT = "UNHANDLED_SUBMIT"
+def get_dirty_state() -> str:
+    return st.session_state.get("dirty_state", DirtyState.NOT_DIRTY)
+def set_dirty_state(state: str) -> None:
+    st.session_state["dirty_state"] = state
+def with_clear_container(submit_clicked: bool) -> bool:
+    if get_dirty_state() == DirtyState.DIRTY:
+        if submit_clicked:
+            set_dirty_state(DirtyState.UNHANDLED_SUBMIT)
+            st.experimental_rerun()
+        else:
+            set_dirty_state(DirtyState.NOT_DIRTY)
+    if submit_clicked or get_dirty_state() == DirtyState.UNHANDLED_SUBMIT:
+        set_dirty_state(DirtyState.DIRTY)
+        return True
+    return False
 #----------------------------------------------------------------
 # Function to simulate question answering (replace with actual implementation)
+def answer_question(question, top_k, keywords, toggles, method, question_type):
     # Simulated answer (replace with actual logic)
     # return f"Answer to '{question}' using method {method} for {question_type} question."
+    return run_ret(question, top_k)
 def get_papers(ids):
     output_str = ''
     for i in rs:
         if rs[i] > 0.5:
+            output_str = output_str + '---> ' + st.session_state.abstracts[i] + '(score: %.2f) \n' %rs[i]
         else:
+            output_str = output_str + st.session_state.abstracts[i] + '(score: %.2f) \n' %rs[i]
     return output_str, rs
+def Library(query, top_k=3):
+    print('get called start')
+    rs = ec.retrieve(query, top_k, return_scores=True)
+    op_docs = ''
+    for i in rs:
+        # op_docs.append(abstracts[i])
+        op_docs = op_docs + st.session_state.abstracts[i] + '\n\n'
+    # st.write(op_docs)
+    print('get called end')
+    return op_docs
+search = DuckDuckGoSearchAPIWrapper()
+tools = [
+    Tool(
+        name="Library",
+        func=Library,
+        description="A source of information pertinent to your question. Do not answer a question without consulting this!"
+    ),
+    Tool(
+        name="Search",
+        func=search.run,
+        description="useful for when you need to look up knowledge about common topics or current events",
+    )
+]
+if 'tools' not in st.session_state:
+    st.session_state.tools = tools
+# for another question type:
+# First, find the quotes from the document that are most relevant to answering the question, and then print them in numbered order.
+# Quotes should be relatively short. If there are no relevant quotes, write “No relevant quotes” instead.
+gen_llm = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
+prefix = """You are an expert astronomer and cosmologist.
+Answer the following question as best you can using information from the library, but speaking in a concise and factual manner.
+If you can not come up with an answer, say you do not know.
+Try to break the question down into smaller steps and solve it in a logical manner.
+You have access to the following tools:"""
+suffix = """Begin! Remember to speak in a pedagogical and factual manner."
+Question: {input}
+{agent_scratchpad}"""
+prompt = ZeroShotAgent.create_prompt(
+    st.session_state.tools, prefix=prefix, suffix=suffix, input_variables=["input", "agent_scratchpad"]
+)
+llm_chain = LLMChain(llm=gen_llm, prompt=prompt)
+tool_names = [tool.name for tool in st.session_state.tools]
+if 'agent' not in st.session_state:
+    agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)
+    st.session_state.agent = agent
+if 'agent_executor' not in st.session_state:
+    agent_executor = AgentExecutor.from_agent_and_tools(
+        agent=st.session_state.agent, tools=st.session_state.tools, verbose=True, handle_parsing_errors=True
+    )
+    st.session_state.agent_executor = agent_executor
 # Streamlit app
 def main():
     # st.title("Question Answering App")
     # Sidebar (Inputs)
+    st.sidebar.header("Fine-tune the search")
+    top_k = st.sidebar.slider("Number of papers to retrieve:", 3, 100, 10)
     extra_keywords = st.sidebar.text_input("Enter extra keywords (comma-separated):")
     st.sidebar.subheader("Toggles")
     toggle_b = st.sidebar.checkbox("Toggle B")
     toggle_c = st.sidebar.checkbox("Toggle C")
+    method = st.sidebar.radio("Choose a method:", ["Semantic search", "Semantic search + HyDE", "Semantic search + HyDE + CoHERE"])
+    question_type = st.sidebar.selectbox("Select question type:", ["Single paper", "Multi-paper", "Summary"])
     # store_output = st.sidebar.checkbox("Store the output")
     # Main page (Outputs)
+    query = st.text_input("Ask me anything:")
     submit_button = st.button("Submit")
     if submit_button:
         toggles = {'A': toggle_a, 'B': toggle_b, 'C': toggle_c}
         # Generate outputs
+        answer, rs = answer_question(query, top_k, keywords, toggles, method, question_type)
         papers_df = get_papers(rs)
         embedding_plot = create_embedding_plot()
+        triggered_keywords = extract_keywords(query)
         consensus = estimate_consensus()
+        # Display outputs
+        answer = st.session_state.agent_executor.run(input=query)
+        # st.write(answer["output"])
         st.write(answer)
+        # st.subheader("Answer")
+        # output_container = st.empty()
+        # if with_clear_container(submit_button):
+        #     output_container = output_container.container()
+        #     output_container.chat_message("user").write(query)
+        #     answer_container = output_container.chat_message("pfdr", avatar="🦜")
+        #     st_callback = StreamlitCallbackHandler(answer_container)
+        #     # cfg = RunnableConfig()
+        #     # cfg["callbacks"] = [st_callback]
+        #     answer = st.session_state.agent_executor.run(input=query, callbacks=[st_callback])
+        #     try:
+        #         answer_container.write(answer["output"])
+        #     except:
+        #         answer_container.write('No final answer')
+        # st.write(answer)
+        with st.expander("Relevant papers", expanded=True):
             st.dataframe(papers_df)
+        with st.expander("Embedding map", expanded=True):
+            st.bokeh_chart(embedding_plot)
         col1, col2 = st.columns(2)
         with col1:
+            st.subheader("Question Type")
+            st.write(question_type)
             st.subheader("Triggered Keywords")
             st.write(", ".join(triggered_keywords))
         with col2:
             st.subheader("Consensus Estimate")
             st.write(f"{consensus:.2%}")

requirements.txt CHANGED Viewed

@@ -15,4 +15,5 @@ tiktoken
 chromadb
 streamlit-extras
 nltk
-hickle

 chromadb
 streamlit-extras
 nltk
+cohere
+duckduckgo-search