Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain_openai import ChatOpenAI | |
from langchain_community.llms import Ollama | |
from langchain_community.utilities import SQLDatabase | |
from langchain.chains import create_sql_query_chain | |
import geopandas as gpd | |
import ibis | |
from ibis import _ | |
geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet" | |
con = ibis.duckdb.connect("duck.db", extensions = ["spatial"]) | |
crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"}) | |
# df = crops.to_pandas() | |
df = crops.to_pandas() | |
# + | |
#gdf = gpd.read_parquet("be_vlg.parquet") | |
#gdf.crs | |
# - | |
st.set_page_config( | |
page_title="fiboa chat tool", | |
page_icon="🦜", | |
) | |
st.title("🚧 Early prototype 🚧") | |
# + | |
# from langchain.chains.sql_database.prompt import PROMPT # peek at the default | |
from langchain_core.prompts.prompt import PromptTemplate | |
new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'], | |
template= | |
''' | |
Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query | |
and return the answer. Never use limit for {top_k}. You can order the results by a relevant column to return the most interesting | |
examples in the database. This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type | |
queries as well. | |
If you are asked to "map" or "show on a map", be sure to alway select the "geometry" column in your query. | |
In the response, return only the SQLQuery to run. | |
Pay attention to use only the column names that you can see in the schema description. Be careful to | |
not query for columns that do not exist. Also, pay attention to which column is in which table. | |
Use the following format: | |
Question: Question here | |
SQLQuery: SQL Query to run | |
SQLResult: Result of the SQLQuery | |
Answer: Final answer here | |
Only use the following tables: | |
{table_info} | |
Question: {input} | |
''' | |
) | |
# - | |
llm = ChatOpenAI(temperature=0, api_key=st.secrets["OPENAI_API_KEY"]) | |
# + | |
# Create the SQL query chain with the custom prompt | |
db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True) | |
chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11) | |
## testing | |
#user_input = "Show on a map the 10 largest fields?" | |
#sql_query = chain.invoke({"question": user_input}) | |
#print(sql_query) | |
# | |
# + | |
import lonboard | |
def map_layer(gdf): | |
layer = lonboard.PolygonLayer.from_geopandas( | |
gdf, | |
get_line_width=20, # width in default units (meters) | |
line_width_min_pixels=0.2, # minimum width when zoomed out | |
get_fill_color=[204, 251, 254], # light blue | |
get_line_color=[37, 36, 34], # dark border color | |
) | |
m = lonboard.Map(layer) | |
return m | |
# - | |
import geopandas as gpd | |
from ibis import _ | |
def as_geopandas(response): | |
sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})" | |
con.raw_sql(sql_query) | |
gdf = con.table("testing") | |
if 'geometry' in gdf.columns: | |
gdf = (gdf | |
.cast({"geometry": "geometry"}) | |
.mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326")) | |
.to_pandas()) | |
gdf.set_crs(epsg=4326, inplace=True) | |
return map_layer(gdf) | |
return gdf | |
# + | |
#response = "SELECT * FROM crops LIMIT 100" | |
#fields = as_geopandas(response) | |
#fields | |
# - | |
example = "Which are the 10 largest fields?" | |
with st.container(): | |
if prompt := st.chat_input(example, key="chain"): | |
st.chat_message("user").write(prompt) | |
with st.chat_message("assistant"): | |
response = chain.invoke({"question": prompt}) | |
st.write(response) | |
result = as_geopandas(response) | |
result | |
st.divider() | |