Spaces:
Sleeping
Sleeping
import ibis | |
import os | |
import streamlit as st | |
from langchain.chains import create_sql_query_chain | |
from langchain_community.utilities import SQLDatabase | |
from langchain_core.prompts.prompt import PromptTemplate | |
from langchain_openai import ChatOpenAI | |
from query import execute_prompt | |
# from data import DATA | |
if os.path.exists("duck.db"): | |
os.remove("duck.db") | |
if os.path.exists("duck.db.wal"): | |
os.remove("duck.db.wal") | |
geoparquet = "data.parquet" | |
con = ibis.connect("duckdb://duck.db", extensions = ["spatial"]) | |
con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"}) | |
# for code, url in DATA.items(): | |
# tbl = con.read_parquet(url, code).cast({"geometry": "geometry"}) | |
st.set_page_config( | |
page_title="fiboaGPT", | |
page_icon="🦜", | |
) | |
st.title("fiboaGPT") | |
new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'], | |
template= | |
''' | |
Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query | |
and return the answer. Only limit for {top_k} when asked for "some" or "examples". | |
This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type | |
queries as well. Remember that you must cast blob column to a geom type using ST_GeomFromWKB(geometry) AS geometry | |
before any spatial operations. Do not use ST_GeomFromWKB for non-spatial queries. | |
If you are asked to "map" or "show on a map", then be select the "geometry" column in your query. | |
If asked to show a "table", you must not include the "geometry" column from the query results. | |
Use the following format: return only the SQLQuery to run. DO NOT use the prefix with "SQLQuery:". | |
Do not include an explanation. | |
Pay close attention to use only the column names that you can see in the schema description. Be careful to | |
not query for columns that do not exist. Also, pay attention to which column is in which table. | |
Tables include {table_info}. The data you should use always comes from the table called "crops". | |
Only use that table, do not use the "testing" table. Pay close attention to this table schema. | |
The column area is in the unit hectares, you may need to convert it to other units, e.g. square meters. | |
The column perimeter is in the unit meters, you may need to convert it to other units, e.g. kilometers. | |
Question: {input} | |
''' | |
) | |
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) | |
# Create the SQL query chain with the custom prompt | |
db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True) | |
chain = create_sql_query_chain(llm, db, prompt=new_prompt, k=100) | |
''' | |
Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g. | |
- Show a map with the 10 largest fields | |
- Show a table of the total area by crop type | |
- Compute the total area of all fields in km² and compute the percentage the total area of the baltic states (175015 km²) | |
''' | |
example = "Which are the 10 largest fields?" | |
with st.container(): | |
if prompt := st.chat_input(example, key="chain"): | |
st.chat_message("user").write(prompt) | |
with st.chat_message("assistant"): | |
execute_prompt(con, chain, prompt) | |
st.divider() | |
''' | |
Data sources: https://source.coop/fiboa | |
Data License: CC-BY-SA-4.0 | |
Software License: BSD | |
''' | |