Spaces:
Running
Running
examples
Browse files
rag.py
CHANGED
@@ -66,3 +66,7 @@ if prompt := st.chat_input("What is the goal of CA 30x30?"):
|
|
66 |
st.write('**Context metadata:**\n')
|
67 |
st.write(results['context'][0]['metadata'])
|
68 |
|
|
|
|
|
|
|
|
|
|
66 |
st.write('**Context metadata:**\n')
|
67 |
st.write(results['context'][0]['metadata'])
|
68 |
|
69 |
+
# adapt for memory / multi-question interaction with:
|
70 |
+
# https://python.langchain.com/docs/tutorials/qa_chat_history/
|
71 |
+
|
72 |
+
# Also see structured outputs.
|
sql.py
CHANGED
@@ -4,10 +4,48 @@ from langchain_community.utilities import SQLDatabase
|
|
4 |
from langchain_openai import ChatOpenAI
|
5 |
from langchain.chains import create_sql_query_chain
|
6 |
|
|
|
7 |
# Set up Langchain SQL access
|
8 |
-
db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
|
9 |
parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
|
10 |
-
|
|
|
|
|
11 |
llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
4 |
from langchain_openai import ChatOpenAI
|
5 |
from langchain.chains import create_sql_query_chain
|
6 |
|
7 |
+
# +
|
8 |
# Set up Langchain SQL access
|
|
|
9 |
parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
|
10 |
+
|
11 |
+
db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
|
12 |
+
db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
|
13 |
llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
|
14 |
+
|
15 |
+
# -
|
16 |
+
|
17 |
+
|
18 |
+
from langchain_core.prompts import PromptTemplate
|
19 |
+
template = '''
|
20 |
+
You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
|
21 |
+
Never query for all columns from a table.
|
22 |
+
You must query only the columns that are needed to answer the question.
|
23 |
+
Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
24 |
+
Pay attention to use only the column names you can see in the tables below.
|
25 |
+
Be careful to not query for columns that do not exist.
|
26 |
+
Also, pay attention to which column is in which table.
|
27 |
+
Pay attention to use today() function to get the current date, if the question involves "today".
|
28 |
+
|
29 |
+
Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
|
30 |
+
|
31 |
+
Only use the following tables:
|
32 |
+
{table_info}
|
33 |
+
|
34 |
+
Question: {input}
|
35 |
+
|
36 |
+
'''
|
37 |
+
prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
|
38 |
+
chain = create_sql_query_chain(llm, db, prompt)
|
39 |
+
|
40 |
+
# +
|
41 |
+
#print(db.dialect)
|
42 |
+
#print(db.get_usable_table_names())
|
43 |
+
#chain.get_prompts()[0].pretty_print()
|
44 |
+
# -
|
45 |
+
|
46 |
+
response = chain.invoke({"question": "Count the number of mammal occurrences in each h0 grouping"})
|
47 |
+
response
|
48 |
+
|
49 |
+
# %%time
|
50 |
+
x = db.run(response)
|
51 |
|