cboettig commited on
Commit
73afc0c
·
1 Parent(s): e16b5ef
Files changed (3) hide show
  1. chat.py +13 -4
  2. rag.py +32 -8
  3. sql.py +19 -7
chat.py CHANGED
@@ -1,12 +1,12 @@
1
- from openai import OpenAI
2
  import streamlit as st
 
3
 
4
  st.title("Chat Demo")
5
 
6
- client = OpenAI(api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
 
 
7
 
8
- if "model" not in st.session_state:
9
- st.session_state["model"] = "llama3" #"groq-tools"
10
 
11
 
12
  if "messages" not in st.session_state:
@@ -16,6 +16,15 @@ for message in st.session_state.messages:
16
  with st.chat_message(message["role"]):
17
  st.markdown(message["content"])
18
 
 
 
 
 
 
 
 
 
 
19
  if prompt := st.chat_input("What is up?"):
20
  st.session_state.messages.append({"role": "user", "content": prompt})
21
  with st.chat_message("user"):
 
 
1
  import streamlit as st
2
+ from openai import OpenAI
3
 
4
  st.title("Chat Demo")
5
 
6
+ with st.sidebar:
7
+ model = st.radio("Select an LLM:", ['llava', 'gemma2', 'phi3', 'llama3', 'embed-mistral', 'mixtral', 'gorilla', 'groq-tools'])
8
+ st.session_state["model"] = model
9
 
 
 
10
 
11
 
12
  if "messages" not in st.session_state:
 
16
  with st.chat_message(message["role"]):
17
  st.markdown(message["content"])
18
 
19
+ client = OpenAI(
20
+ api_key = st.secrets["LITELLM_KEY"],
21
+ base_url = "https://llm.nrp-nautilus.io"
22
+ )
23
+
24
+ # Button to clear session state
25
+ if st.button('Clear History'):
26
+ st.session_state.clear()
27
+
28
  if prompt := st.chat_input("What is up?"):
29
  st.session_state.messages.append({"role": "user", "content": prompt})
30
  with st.chat_message("user"):
rag.py CHANGED
@@ -1,10 +1,24 @@
1
- # +
2
  import streamlit as st
3
-
4
- # Set up the document loader
5
  from langchain_community.document_loaders import PyPDFLoader
6
- loader = PyPDFLoader("ca30x30-2024.pdf")
7
- docs = loader.load()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Set up the language model
10
  from langchain_openai import ChatOpenAI
@@ -47,13 +61,21 @@ prompt = ChatPromptTemplate.from_messages(
47
  question_answer_chain = create_stuff_documents_chain(llm, prompt)
48
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)
49
 
 
50
  # agent is ready to test:
 
51
  #results = rag_chain.invoke({"input": "What is the goal of CA 30x30?"})
52
  #results['answer']
 
 
 
 
 
 
 
53
 
54
 
55
  # Place agent inside a streamlit application:
56
- st.title("RAG Demo")
57
 
58
  if prompt := st.chat_input("What is the goal of CA 30x30?"):
59
  with st.chat_message("user"):
@@ -63,8 +85,10 @@ if prompt := st.chat_input("What is the goal of CA 30x30?"):
63
  results = rag_chain.invoke({"input": prompt})
64
  st.write(results['answer'])
65
 
66
- st.write('**Context metadata:**\n')
67
- st.write(results['context'][0]['metadata'])
 
 
68
 
69
  # adapt for memory / multi-question interaction with:
70
  # https://python.langchain.com/docs/tutorials/qa_chat_history/
 
 
1
  import streamlit as st
 
 
2
  from langchain_community.document_loaders import PyPDFLoader
3
+
4
+ st.title("RAG Demo")
5
+
6
+
7
+ '''
8
+ Provide a URL to a PDF document you want to ask questions about.
9
+ Once the document has been uploaded and parsed, ask your questions in the chat dialog that will appear below.
10
+ '''
11
+
12
+ # Create a file uploader?
13
+ # st.sidebar.file_uploader("Choose a PDF file", type=["pdf"])
14
+ url = st.text_input("PDF URL", "https://www.resources.ca.gov/-/media/CNRA-Website/Files/2024_30x30_Pathways_Progress_Report.pdf")
15
+
16
+ @st.cache_data
17
+ def doc_loader(url):
18
+ loader = PyPDFLoader(url)
19
+ return loader.load()
20
+
21
+ docs = doc_loader(url)
22
 
23
  # Set up the language model
24
  from langchain_openai import ChatOpenAI
 
61
  question_answer_chain = create_stuff_documents_chain(llm, prompt)
62
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)
63
 
64
+ # +
65
  # agent is ready to test:
66
+
67
  #results = rag_chain.invoke({"input": "What is the goal of CA 30x30?"})
68
  #results['answer']
69
+ #results['context'][0].page_content
70
+ #results['context'][0].metadata
71
+
72
+ # -
73
+
74
+ #results['context'][0].page_content
75
+ #results['context'][0].metadata
76
 
77
 
78
  # Place agent inside a streamlit application:
 
79
 
80
  if prompt := st.chat_input("What is the goal of CA 30x30?"):
81
  with st.chat_message("user"):
 
85
  results = rag_chain.invoke({"input": prompt})
86
  st.write(results['answer'])
87
 
88
+ with st.expander("See context matched"):
89
+ st.write(results['context'][0].page_content)
90
+ st.write(results['context'][0].metadata)
91
+
92
 
93
  # adapt for memory / multi-question interaction with:
94
  # https://python.langchain.com/docs/tutorials/qa_chat_history/
sql.py CHANGED
@@ -6,11 +6,17 @@ from langchain.chains import create_sql_query_chain
6
 
7
  # +
8
  # Set up Langchain SQL access
9
- parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
10
 
11
  db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
12
  db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
13
- llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
 
 
 
 
 
 
14
 
15
  # -
16
 
@@ -18,8 +24,7 @@ llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"
18
  from langchain_core.prompts import PromptTemplate
19
  template = '''
20
  You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
21
- Never query for all columns from a table.
22
- You must query only the columns that are needed to answer the question.
23
  Wrap each column name in double quotes (") to denote them as delimited identifiers.
24
  Pay attention to use only the column names you can see in the tables below.
25
  Be careful to not query for columns that do not exist.
@@ -43,9 +48,16 @@ chain = create_sql_query_chain(llm, db, prompt)
43
  #chain.get_prompts()[0].pretty_print()
44
  # -
45
 
46
- response = chain.invoke({"question": "Count the number of mammal occurrences in each h0 grouping"})
47
  response
48
 
49
- # %%time
50
- x = db.run(response)
 
 
 
 
 
 
 
51
 
 
6
 
7
  # +
8
  # Set up Langchain SQL access
9
+ parquet = "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet"
10
 
11
  db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
12
  db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
13
+
14
+ llm = ChatOpenAI(model="llama3",
15
+ temperature=0,
16
+ api_key=st.secrets["LITELLM_KEY"],
17
+ base_url = "https://llm.nrp-nautilus.io")
18
+
19
+ db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
20
 
21
  # -
22
 
 
24
  from langchain_core.prompts import PromptTemplate
25
  template = '''
26
  You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
27
+ Always return all columns from a query (select *) unless otherwise instructed.
 
28
  Wrap each column name in double quotes (") to denote them as delimited identifiers.
29
  Pay attention to use only the column names you can see in the tables below.
30
  Be careful to not query for columns that do not exist.
 
48
  #chain.get_prompts()[0].pretty_print()
49
  # -
50
 
51
+ response = chain.invoke({"question": "what is the mean ndvi by grade?"})
52
  response
53
 
54
+ # +
55
+ # use the response in a query
56
+
57
+ import ibis
58
+ from ibis import _
59
+ con = ibis.duckdb.connect()
60
+ tbl = con.read_parquet(parquet, "mydata")
61
+ tbl.sql(response).execute()
62
+
63