File size: 4,468 Bytes
66373a4
 
 
 
 
 
 
 
 
 
 
 
832f5c2
66373a4
 
 
832f5c2
 
66373a4
 
 
 
 
 
 
 
 
 
832f5c2
66373a4
 
 
 
 
 
 
 
 
832f5c2
 
 
55e6cbf
 
832f5c2
 
 
 
 
 
 
66373a4
cf1a22e
66373a4
 
832f5c2
cf1a22e
66373a4
 
 
 
 
 
832f5c2
66373a4
 
 
 
 
 
 
 
 
 
 
 
 
832f5c2
66373a4
 
 
 
832f5c2
66373a4
 
832f5c2
 
 
 
66373a4
832f5c2
66373a4
 
 
 
 
 
 
832f5c2
 
 
 
66373a4
 
 
 
832f5c2
 
 
 
 
 
 
 
 
 
 
 
cf1a22e
832f5c2
 
 
 
66373a4
 
 
 
 
 
 
832f5c2
 
 
 
 
 
66373a4
832f5c2
66373a4
832f5c2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
from langchain_openai import ChatOpenAI 
from langchain_community.llms import Ollama
from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
import geopandas as gpd


import ibis
from ibis import _
geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
#con.raw_sql(f'CREATE OR REPLACE VIEW crops AS SELECT *, ST_GEOMFROMWKB(geometry) AS "geometry" FROM read_parquet("{geoparquet}")')
crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
# df = crops.to_pandas()

# +
# df = crops.to_pandas()

# +
#gdf = gpd.read_parquet("be_vlg.parquet")
#gdf.crs
# -

st.set_page_config(
    page_title="fiboa chat tool",
    page_icon="🦜",
)
st.title("FiobaGPT Prototype")

# +
# from langchain.chains.sql_database.prompt import PROMPT # peek at the default
from langchain_core.prompts.prompt import PromptTemplate

new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'], 
                        template=
'''
Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query
and return the answer. Only limit for {top_k} when asked for "some" or "examples". 

This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type
queries as well.  Remember that you must cast blob column to a geom type using ST_GeomFromWKB(geometry) AS geometry
before any spatial operations. Do not use ST_GeomFromWKB for non-spatial queries.


If you are asked to "map" or "show on a map", then be select the "geometry" column in your query.
If asked to show a "table", you must not include the "geometry" column from the query results.  

Use the following format: return only the SQLQuery to run. DO NOT use the prefix with "SQLQuery:".  
Do not include an explanation.  

Pay close attention to use only the column names that you can see in the schema description. Be careful to
not query for columns that do not exist. Also, pay attention to which column is in which table.

Tables include {table_info}. The data you should use always comes from the table called "crops".
Only use that table, do not use the "testing" table. Pay close attention to this table schema.

Question: {input}
'''
)
# -

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])

# +
# Create the SQL query chain with the custom prompt
db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11)

## testing
#user_input = "Show on a map the 10 largest fields?"
#sql_query = chain.invoke({"question": user_input})
#print(sql_query)
# 


# -




# +
import geopandas as gpd
from ibis import _
import re
import leafmap.maplibregl as leafmap
m = leafmap.Map()

def as_geopandas(response):
    response = re.sub(";$", "", response)
    sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
    con.raw_sql(sql_query)
    gdf = con.table("testing")
    if 'geometry' in gdf.columns:
        gdf = (gdf
               .cast({"geometry": "geometry"})
               .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
               .to_pandas()
              ).set_crs(epsg=4326, inplace=True) 
        return gdf
    return gdf.to_pandas()


# -

response = "SELECT geometry, area FROM crops ORDER BY area DESC LIMIT 10;"
as_geopandas(response)
#if 'geometry' in gdf.columns:
#    m.add_gdf(gdf)
#    m
#gdf

# +
'''
Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g.

- "Show a map with the 10 largest fields"
- "Show a table of the total area by crop typology"
- "Compute the perimeters of all fields and determine which have the longest"

'''

example = "Which are the 10 largest fields?"
with st.container():  
    if prompt := st.chat_input(example, key="chain"):
        st.chat_message("user").write(prompt)
        with st.chat_message("assistant"):
            response = chain.invoke({"question": prompt})
            st.write(response)
            gdf = as_geopandas(response)
            if 'geometry' in gdf.columns:
                m.add_gdf(gdf)
                m.to_streamlit()
            else:
                st.dataframe(gdf)

# +
st.divider()

'''
Data sources: https://beta.source.coop/fiboa/be-vlg
Software License: BSD

'''