Spaces:

cboettig
/

streamlit-demo

Running

App Files Files Community

cboettig commited on Nov 17, 2024

Commit

03da1c6

1 Parent(s): 760e177

examples

Browse files

Files changed (2) hide show

rag.py +4 -0
sql.py +41 -3

rag.py CHANGED Viewed

@@ -66,3 +66,7 @@ if prompt := st.chat_input("What is the goal of CA 30x30?"):
         st.write('**Context metadata:**\n')
         st.write(results['context'][0]['metadata'])

         st.write('**Context metadata:**\n')
         st.write(results['context'][0]['metadata'])
+# adapt for memory / multi-question interaction with:
+# https://python.langchain.com/docs/tutorials/qa_chat_history/
+# Also see structured outputs.

sql.py CHANGED Viewed

@@ -4,10 +4,48 @@ from langchain_community.utilities import SQLDatabase
 from langchain_openai import ChatOpenAI
 from langchain.chains import create_sql_query_chain
 # Set up Langchain SQL access
-db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
 parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
-db.run(f"create or replace view gbif_h3 as select * from read_parquet('{parquet}');")
 llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
-chain = create_sql_query_chain(llm, db)

 from langchain_openai import ChatOpenAI
 from langchain.chains import create_sql_query_chain
+# +
 # Set up Langchain SQL access
 parquet = "s3://us-west-2.opendata.source.coop/cboettig/gbif/2024-10-01/**"
+db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
+db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
 llm = ChatOpenAI(model="llama3", temperature=0, api_key=st.secrets["LITELLM_KEY"], base_url = "https://llm.nrp-nautilus.io")
+# -
+from langchain_core.prompts import PromptTemplate
+template = '''
+You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
+Never query for all columns from a table.
+You must query only the columns that are needed to answer the question.
+Wrap each column name in double quotes (") to denote them as delimited identifiers.
+Pay attention to use only the column names you can see in the tables below.
+Be careful to not query for columns that do not exist.
+Also, pay attention to which column is in which table.
+Pay attention to use today() function to get the current date, if the question involves "today".
+Respond with only the SQL query to run.  Do not repeat the question or explanation. Just the raw SQL query.
+Only use the following tables:
+{table_info}
+Question: {input}
+'''
+prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
+chain = create_sql_query_chain(llm, db, prompt)
+# +
+#print(db.dialect)
+#print(db.get_usable_table_names())
+#chain.get_prompts()[0].pretty_print()
+# -
+response = chain.invoke({"question": "Count the number of mammal occurrences in each h0 grouping"})
+response
+# %%time
+x = db.run(response)