fiboa-demo

Sleeping

App Files Files Community

fiboa-demo / app.py

cboettig

and so it begins

66373a4 5 months ago

raw

history blame

3.72 kB

	import streamlit as st
	from langchain_openai import ChatOpenAI
	from langchain_community.llms import Ollama
	from langchain_community.utilities import SQLDatabase
	from langchain.chains import create_sql_query_chain
	import geopandas as gpd


	import ibis
	from ibis import _
	geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
	con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
	crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
	# df = crops.to_pandas()

	df = crops.to_pandas()

	# +
	#gdf = gpd.read_parquet("be_vlg.parquet")
	#gdf.crs
	# -

	st.set_page_config(
	page_title="fiboa chat tool",
	page_icon="🦜",
	)
	st.title("🚧 Early prototype 🚧")

	# +
	# from langchain.chains.sql_database.prompt import PROMPT # peek at the default
	from langchain_core.prompts.prompt import PromptTemplate

	new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'],
	template=
	'''
	Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query
	and return the answer. Never use limit for {top_k}. You can order the results by a relevant column to return the most interesting
	examples in the database. This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type
	queries as well.

	If you are asked to "map" or "show on a map", be sure to alway select the "geometry" column in your query.
	In the response, return only the SQLQuery to run.

	Pay attention to use only the column names that you can see in the schema description. Be careful to
	not query for columns that do not exist. Also, pay attention to which column is in which table.

	Use the following format:
	Question: Question here
	SQLQuery: SQL Query to run
	SQLResult: Result of the SQLQuery
	Answer: Final answer here

	Only use the following tables:
	{table_info}

	Question: {input}
	'''
	)
	# -

	llm = ChatOpenAI(temperature=0, api_key=st.secrets["OPENAI_API_KEY"])

	# +
	# Create the SQL query chain with the custom prompt
	db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
	chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11)

	## testing
	#user_input = "Show on a map the 10 largest fields?"
	#sql_query = chain.invoke({"question": user_input})
	#print(sql_query)
	#




	# +
	import lonboard

	def map_layer(gdf):
	layer = lonboard.PolygonLayer.from_geopandas(
	gdf,
	get_line_width=20, # width in default units (meters)
	line_width_min_pixels=0.2, # minimum width when zoomed out
	get_fill_color=[204, 251, 254], # light blue
	get_line_color=[37, 36, 34], # dark border color
	)
	m = lonboard.Map(layer)
	return m


	# -

	import geopandas as gpd
	from ibis import _
	def as_geopandas(response):
	sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
	con.raw_sql(sql_query)
	gdf = con.table("testing")
	if 'geometry' in gdf.columns:
	gdf = (gdf
	.cast({"geometry": "geometry"})
	.mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
	.to_pandas())
	gdf.set_crs(epsg=4326, inplace=True)
	return map_layer(gdf)
	return gdf


	# +
	#response = "SELECT * FROM crops LIMIT 100"
	#fields = as_geopandas(response)
	#fields
	# -

	example = "Which are the 10 largest fields?"
	with st.container():
	if prompt := st.chat_input(example, key="chain"):
	st.chat_message("user").write(prompt)
	with st.chat_message("assistant"):
	response = chain.invoke({"question": prompt})
	st.write(response)
	result = as_geopandas(response)
	result

	st.divider()