cboettig commited on
Commit
03da1c6
·
1 Parent(s): 760e177
Files changed (2) hide show
  1. rag.py +4 -0
  2. sql.py +41 -3
rag.py CHANGED
@@ -66,3 +66,7 @@ if prompt := st.chat_input("What is the goal of CA 30x30?"):
66
  st.write('**Context metadata:**\n')
67
  st.write(results['context'][0]['metadata'])
68
 
 
 
 
 
 
66
  st.write('**Context metadata:**\n')
67
  st.write(results['context'][0]['metadata'])
68
 
69
+ # adapt for memory / multi-question interaction with:
70
+ # https://python.langchain.com/docs/tutorials/qa_chat_history/
71
+
72
+ # Also see structured outputs.
sql.py CHANGED
@@ -4,10 +4,48 @@ from langchain_community.utilities import SQLDatabase
4
  from langchain_openai import ChatOpenAI
5
  from langchain.chains import create_sql_query_chain
6
 
 
7
  # Set up Langchain SQL access
8
- db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
9
  parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
10
- db.run(f"create or replace view gbif_h3 as select * from read_parquet('{parquet}');")
 
 
11
  llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
12
- chain = create_sql_query_chain(llm, db)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
4
  from langchain_openai import ChatOpenAI
5
  from langchain.chains import create_sql_query_chain
6
 
7
+ # +
8
  # Set up Langchain SQL access
 
9
  parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
10
+
11
+ db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
12
+ db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
13
  llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
14
+
15
+ # -
16
+
17
+
18
+ from langchain_core.prompts import PromptTemplate
19
+ template = '''
20
+ You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
21
+ Never query for all columns from a table.
22
+ You must query only the columns that are needed to answer the question.
23
+ Wrap each column name in double quotes (") to denote them as delimited identifiers.
24
+ Pay attention to use only the column names you can see in the tables below.
25
+ Be careful to not query for columns that do not exist.
26
+ Also, pay attention to which column is in which table.
27
+ Pay attention to use today() function to get the current date, if the question involves "today".
28
+
29
+ Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
30
+
31
+ Only use the following tables:
32
+ {table_info}
33
+
34
+ Question: {input}
35
+
36
+ '''
37
+ prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
38
+ chain = create_sql_query_chain(llm, db, prompt)
39
+
40
+ # +
41
+ #print(db.dialect)
42
+ #print(db.get_usable_table_names())
43
+ #chain.get_prompts()[0].pretty_print()
44
+ # -
45
+
46
+ response = chain.invoke({"question": "Count the number of mammal occurrences in each h0 grouping"})
47
+ response
48
+
49
+ # %%time
50
+ x = db.run(response)
51