cboettig commited on
Commit
66373a4
·
1 Parent(s): c60afa2

and so it begins

Browse files
Files changed (2) hide show
  1. app.py +126 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_community.llms import Ollama
4
+ from langchain_community.utilities import SQLDatabase
5
+ from langchain.chains import create_sql_query_chain
6
+ import geopandas as gpd
7
+
8
+
9
+ import ibis
10
+ from ibis import _
11
+ geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
12
+ con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
13
+ crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
14
+ # df = crops.to_pandas()
15
+
16
+ df = crops.to_pandas()
17
+
18
+ # +
19
+ #gdf = gpd.read_parquet("be_vlg.parquet")
20
+ #gdf.crs
21
+ # -
22
+
23
+ st.set_page_config(
24
+ page_title="fiboa chat tool",
25
+ page_icon="🦜",
26
+ )
27
+ st.title("🚧 Early prototype 🚧")
28
+
29
+ # +
30
+ # from langchain.chains.sql_database.prompt import PROMPT # peek at the default
31
+ from langchain_core.prompts.prompt import PromptTemplate
32
+
33
+ new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'],
34
+ template=
35
+ '''
36
+ Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query
37
+ and return the answer. Never use limit for {top_k}. You can order the results by a relevant column to return the most interesting
38
+ examples in the database. This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type
39
+ queries as well.
40
+
41
+ If you are asked to "map" or "show on a map", be sure to alway select the "geometry" column in your query.
42
+ In the response, return only the SQLQuery to run.
43
+
44
+ Pay attention to use only the column names that you can see in the schema description. Be careful to
45
+ not query for columns that do not exist. Also, pay attention to which column is in which table.
46
+
47
+ Use the following format:
48
+ Question: Question here
49
+ SQLQuery: SQL Query to run
50
+ SQLResult: Result of the SQLQuery
51
+ Answer: Final answer here
52
+
53
+ Only use the following tables:
54
+ {table_info}
55
+
56
+ Question: {input}
57
+ '''
58
+ )
59
+ # -
60
+
61
+ llm = ChatOpenAI(temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
62
+
63
+ # +
64
+ # Create the SQL query chain with the custom prompt
65
+ db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
66
+ chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11)
67
+
68
+ ## testing
69
+ #user_input = "Show on a map the 10 largest fields?"
70
+ #sql_query = chain.invoke({"question": user_input})
71
+ #print(sql_query)
72
+ #
73
+
74
+
75
+
76
+
77
+ # +
78
+ import lonboard
79
+
80
+ def map_layer(gdf):
81
+ layer = lonboard.PolygonLayer.from_geopandas(
82
+ gdf,
83
+ get_line_width=20, # width in default units (meters)
84
+ line_width_min_pixels=0.2, # minimum width when zoomed out
85
+ get_fill_color=[204, 251, 254], # light blue
86
+ get_line_color=[37, 36, 34], # dark border color
87
+ )
88
+ m = lonboard.Map(layer)
89
+ return m
90
+
91
+
92
+ # -
93
+
94
+ import geopandas as gpd
95
+ from ibis import _
96
+ def as_geopandas(response):
97
+ sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
98
+ con.raw_sql(sql_query)
99
+ gdf = con.table("testing")
100
+ if 'geometry' in gdf.columns:
101
+ gdf = (gdf
102
+ .cast({"geometry": "geometry"})
103
+ .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
104
+ .to_pandas())
105
+ gdf.set_crs(epsg=4326, inplace=True)
106
+ return map_layer(gdf)
107
+ return gdf
108
+
109
+
110
+ # +
111
+ #response = "SELECT * FROM crops LIMIT 100"
112
+ #fields = as_geopandas(response)
113
+ #fields
114
+ # -
115
+
116
+ example = "Which are the 10 largest fields?"
117
+ with st.container():
118
+ if prompt := st.chat_input(example, key="chain"):
119
+ st.chat_message("user").write(prompt)
120
+ with st.chat_message("assistant"):
121
+ response = chain.invoke({"question": prompt})
122
+ st.write(response)
123
+ result = as_geopandas(response)
124
+ result
125
+
126
+ st.divider()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ langchain_community
4
+ langchain_openai
5
+ duckdb_engine
6
+ duckdb
7
+ altair
8
+ ibis-framework[duckdb]
9
+ lonboard