Spaces:

cboettig
/

streamlit-demo

Running

App Files Files Community

cboettig commited on Nov 18, 2024

Commit

73afc0c

1 Parent(s): e16b5ef

demos

Browse files

Files changed (3) hide show

chat.py +13 -4
rag.py +32 -8
sql.py +19 -7

chat.py CHANGED Viewed

@@ -1,12 +1,12 @@
-from openai import OpenAI
 import streamlit as st
 st.title("Chat Demo")
-client = OpenAI(api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
-if "model" not in st.session_state:
-    st.session_state["model"] =  "llama3" #"groq-tools"
 if "messages" not in st.session_state:
@@ -16,6 +16,15 @@ for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 if prompt := st.chat_input("What is up?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):

 import streamlit as st
+from openai import OpenAI
 st.title("Chat Demo")
+with st.sidebar:
+    model = st.radio("Select an LLM:", ['llava', 'gemma2', 'phi3', 'llama3', 'embed-mistral', 'mixtral', 'gorilla', 'groq-tools'])
+    st.session_state["model"] =  model
 if "messages" not in st.session_state:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
+client = OpenAI(
+    api_key = st.secrets["LITELLM_KEY"],
+    base_url = "https://llm.nrp-nautilus.io"
+)
+# Button to clear session state
+if st.button('Clear History'):
+    st.session_state.clear()
 if prompt := st.chat_input("What is up?"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):

rag.py CHANGED Viewed

@@ -1,10 +1,24 @@
-# +
 import streamlit as st
-# Set up the document loader
 from langchain_community.document_loaders import PyPDFLoader
-loader = PyPDFLoader("ca30x30-2024.pdf")
-docs = loader.load()
 # Set up the language model
 from langchain_openai import ChatOpenAI
@@ -47,13 +61,21 @@ prompt = ChatPromptTemplate.from_messages(
 question_answer_chain = create_stuff_documents_chain(llm, prompt)
 rag_chain = create_retrieval_chain(retriever, question_answer_chain)
 # agent is ready to test:
 #results = rag_chain.invoke({"input": "What is the goal of CA 30x30?"})
 #results['answer']
 # Place agent inside a streamlit application:
-st.title("RAG Demo")
 if prompt := st.chat_input("What is the goal of CA 30x30?"):
     with st.chat_message("user"):
@@ -63,8 +85,10 @@ if prompt := st.chat_input("What is the goal of CA 30x30?"):
         results = rag_chain.invoke({"input": prompt})
         st.write(results['answer'])
-        st.write('**Context metadata:**\n')
-        st.write(results['context'][0]['metadata'])
 # adapt for memory / multi-question interaction with:
 # https://python.langchain.com/docs/tutorials/qa_chat_history/

 import streamlit as st
 from langchain_community.document_loaders import PyPDFLoader
+st.title("RAG Demo")
+'''
+Provide a URL to a PDF document you want to ask questions about.
+Once the document has been uploaded and parsed, ask your questions in the chat dialog that will appear below.
+'''
+# Create a file uploader?
+# st.sidebar.file_uploader("Choose a PDF file", type=["pdf"])
+url = st.text_input("PDF URL", "https://www.resources.ca.gov/-/media/CNRA-Website/Files/2024_30x30_Pathways_Progress_Report.pdf")
+@st.cache_data
+def doc_loader(url):
+    loader = PyPDFLoader(url)
+    return loader.load()
+docs = doc_loader(url)
 # Set up the language model
 from langchain_openai import ChatOpenAI
 question_answer_chain = create_stuff_documents_chain(llm, prompt)
 rag_chain = create_retrieval_chain(retriever, question_answer_chain)
+# +
 # agent is ready to test:
 #results = rag_chain.invoke({"input": "What is the goal of CA 30x30?"})
 #results['answer']
+#results['context'][0].page_content
+#results['context'][0].metadata
+# -
+#results['context'][0].page_content
+#results['context'][0].metadata
 # Place agent inside a streamlit application:
 if prompt := st.chat_input("What is the goal of CA 30x30?"):
     with st.chat_message("user"):
         results = rag_chain.invoke({"input": prompt})
         st.write(results['answer'])
+        with st.expander("See context matched"):
+            st.write(results['context'][0].page_content)
+            st.write(results['context'][0].metadata)
 # adapt for memory / multi-question interaction with:
 # https://python.langchain.com/docs/tutorials/qa_chat_history/

sql.py CHANGED Viewed

@@ -6,11 +6,17 @@ from langchain.chains import create_sql_query_chain
 # +
 # Set up Langchain SQL access
-parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
 db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
 db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
-llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
 # -
@@ -18,8 +24,7 @@ llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"
 from langchain_core.prompts import PromptTemplate
 template = '''
 You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
-Never query for all columns from a table.
-You must query only the columns that are needed to answer the question.
 Wrap each column name in double quotes (") to denote them as delimited identifiers.
 Pay attention to use only the column names you can see in the tables below.
 Be careful to not query for columns that do not exist.
@@ -43,9 +48,16 @@ chain = create_sql_query_chain(llm, db, prompt)
 #chain.get_prompts()[0].pretty_print()
 # -
-response = chain.invoke({"question": "Count the number of mammal occurrences in each h0 grouping"})
 response
-# %%time
-x = db.run(response)

 # +
 # Set up Langchain SQL access
+parquet = "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet"
 db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
 db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
+llm = ChatOpenAI(model="llama3",
+                 temperature=0,
+                 api_key=st.secrets["LITELLM_KEY"],
+                 base_url = "https://llm.nrp-nautilus.io")
+db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
 # -
 from langchain_core.prompts import PromptTemplate
 template = '''
 You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
+Always return all columns from a query (select *) unless otherwise instructed.
 Wrap each column name in double quotes (") to denote them as delimited identifiers.
 Pay attention to use only the column names you can see in the tables below.
 Be careful to not query for columns that do not exist.
 #chain.get_prompts()[0].pretty_print()
 # -
+response = chain.invoke({"question": "what is the mean ndvi by grade?"})
 response
+# +
+# use the response in a query
+import ibis
+from ibis import _
+con = ibis.duckdb.connect()
+tbl = con.read_parquet(parquet, "mydata")
+tbl.sql(response).execute()