Matthias Mohr commited on
Commit
60b99bb
·
1 Parent(s): f859084

Fix datatype related issues

Browse files
Files changed (6) hide show
  1. .gitignore +3 -1
  2. README.md +3 -1
  3. fiboa/__init__.py +0 -0
  4. app.py → fiboa/app.py +11 -79
  5. fiboa/query.py +46 -0
  6. requirements.txt +1 -2
.gitignore CHANGED
@@ -2,4 +2,6 @@
2
  .ipynb_checkpoints
3
  .venv
4
  .streamlit
5
-
 
 
 
2
  .ipynb_checkpoints
3
  .venv
4
  .streamlit
5
+ *.pyc
6
+ duck.db.wal
7
+ __pycache__/
README.md CHANGED
@@ -5,9 +5,11 @@ colorFrom: blue
5
  colorTo: green
6
  sdk: streamlit
7
  sdk_version: 1.37.1
8
- app_file: app.py
9
  pinned: false
10
  license: bsd
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
5
  colorTo: green
6
  sdk: streamlit
7
  sdk_version: 1.37.1
8
+ app_file: fiboa/app.py
9
  pinned: false
10
  license: bsd
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ Run it: `streamlit run fiboa/app.py`
fiboa/__init__.py ADDED
File without changes
app.py → fiboa/app.py RENAMED
@@ -1,36 +1,23 @@
 
1
  import streamlit as st
2
- from langchain_openai import ChatOpenAI
3
- from langchain_community.llms import Ollama
4
- from langchain_community.utilities import SQLDatabase
5
  from langchain.chains import create_sql_query_chain
6
- import geopandas as gpd
 
 
7
 
 
8
 
9
- import ibis
10
- from ibis import _
11
  geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
12
  con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
13
  #con.raw_sql(f'CREATE OR REPLACE VIEW crops AS SELECT *, ST_GEOMFROMWKB(geometry) AS "geometry" FROM read_parquet("{geoparquet}")')
14
  crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
15
- # df = crops.to_pandas()
16
-
17
- # +
18
- # df = crops.to_pandas()
19
-
20
- # +
21
- #gdf = gpd.read_parquet("be_vlg.parquet")
22
- #gdf.crs
23
- # -
24
 
25
  st.set_page_config(
26
- page_title="fiboa chat tool",
27
  page_icon="🦜",
28
  )
29
- st.title("FiobaGPT Prototype")
30
-
31
- # +
32
- # from langchain.chains.sql_database.prompt import PROMPT # peek at the default
33
- from langchain_core.prompts.prompt import PromptTemplate
34
 
35
  new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'],
36
  template=
@@ -58,59 +45,13 @@ Only use that table, do not use the "testing" table. Pay close attention to this
58
  Question: {input}
59
  '''
60
  )
61
- # -
62
 
63
  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
64
 
65
- # +
66
  # Create the SQL query chain with the custom prompt
67
  db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
68
- chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11)
69
-
70
- ## testing
71
- #user_input = "Show on a map the 10 largest fields?"
72
- #sql_query = chain.invoke({"question": user_input})
73
- #print(sql_query)
74
- #
75
-
76
 
77
- # -
78
-
79
-
80
-
81
-
82
- # +
83
- import geopandas as gpd
84
- from ibis import _
85
- import re
86
- import leafmap.maplibregl as leafmap
87
- m = leafmap.Map()
88
-
89
- def as_geopandas(response):
90
- response = re.sub(";$", "", response)
91
- sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
92
- con.raw_sql(sql_query)
93
- gdf = con.table("testing")
94
- if 'geometry' in gdf.columns:
95
- gdf = (gdf
96
- .cast({"geometry": "geometry"})
97
- .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
98
- .to_pandas()
99
- ).set_crs(epsg=4326, inplace=True)
100
- return gdf
101
- return gdf.to_pandas()
102
-
103
-
104
- # -
105
-
106
- response = "SELECT geometry, area FROM crops ORDER BY area DESC LIMIT 10;"
107
- as_geopandas(response)
108
- #if 'geometry' in gdf.columns:
109
- # m.add_gdf(gdf)
110
- # m
111
- #gdf
112
-
113
- # +
114
  '''
115
  Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g.
116
 
@@ -125,20 +66,11 @@ with st.container():
125
  if prompt := st.chat_input(example, key="chain"):
126
  st.chat_message("user").write(prompt)
127
  with st.chat_message("assistant"):
128
- response = chain.invoke({"question": prompt})
129
- st.write(response)
130
- gdf = as_geopandas(response)
131
- if 'geometry' in gdf.columns:
132
- m.add_gdf(gdf)
133
- m.to_streamlit()
134
- else:
135
- st.dataframe(gdf)
136
-
137
- # +
138
  st.divider()
139
 
140
  '''
141
  Data sources: https://beta.source.coop/fiboa/be-vlg
142
  Software License: BSD
143
-
144
  '''
 
1
+ import ibis
2
  import streamlit as st
3
+
 
 
4
  from langchain.chains import create_sql_query_chain
5
+ from langchain_community.utilities import SQLDatabase
6
+ from langchain_core.prompts.prompt import PromptTemplate
7
+ from langchain_openai import ChatOpenAI
8
 
9
+ from query import execute_prompt
10
 
 
 
11
  geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
12
  con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
13
  #con.raw_sql(f'CREATE OR REPLACE VIEW crops AS SELECT *, ST_GEOMFROMWKB(geometry) AS "geometry" FROM read_parquet("{geoparquet}")')
14
  crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
 
 
 
 
 
 
 
 
 
15
 
16
  st.set_page_config(
17
+ page_title="fiboaGPT",
18
  page_icon="🦜",
19
  )
20
+ st.title("fiboaGPT")
 
 
 
 
21
 
22
  new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', 'top_k'],
23
  template=
 
45
  Question: {input}
46
  '''
47
  )
 
48
 
49
  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
50
 
 
51
  # Create the SQL query chain with the custom prompt
52
  db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
53
+ chain = create_sql_query_chain(llm, db, prompt=new_prompt, k=11)
 
 
 
 
 
 
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  '''
56
  Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g.
57
 
 
66
  if prompt := st.chat_input(example, key="chain"):
67
  st.chat_message("user").write(prompt)
68
  with st.chat_message("assistant"):
69
+ execute_prompt(con, chain, prompt)
70
+
 
 
 
 
 
 
 
 
71
  st.divider()
72
 
73
  '''
74
  Data sources: https://beta.source.coop/fiboa/be-vlg
75
  Software License: BSD
 
76
  '''
fiboa/query.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import leafmap.maplibregl as leafmap
4
+ import pandas as pd
5
+ import streamlit as st
6
+ from ibis import _
7
+
8
+
9
+ def execute_prompt(con, chain, prompt):
10
+ response = chain.invoke({"question": prompt})
11
+ st.write(response)
12
+ gdf = as_geopandas(con, response)
13
+
14
+ if 'geometry' in gdf.columns:
15
+ m = leafmap.Map()
16
+ m.add_gdf(gdf)
17
+ m.to_streamlit()
18
+ else:
19
+ gdf.drop(columns=['geometry', 'bbox', 'bbox.minx', 'bbox.maxx', 'bbox.miny', 'bbox.maxy'], errors='ignore', inplace=True)
20
+ st.dataframe(pd.DataFrame(gdf), column_config={
21
+ "area": st.column_config.NumberColumn("Area (ha)", format="%.5f"),
22
+ "perimeter": st.column_config.NumberColumn("Perimeter (m)", format="%.3f"),
23
+ "determination_datetime": st.column_config.DatetimeColumn("Determination Date"),
24
+ })
25
+
26
+ def as_geopandas(con, response):
27
+ response = re.sub(";$", "", response)
28
+ sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
29
+ con.raw_sql(sql_query)
30
+ gdf = con.table("testing")
31
+
32
+ if 'geometry' in gdf.columns:
33
+ gdf = (gdf
34
+ .cast({"geometry": "geometry"})
35
+ .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
36
+ .to_pandas()
37
+ ).set_crs(epsg=4326, inplace=True)
38
+ else:
39
+ gdf = gdf.to_pandas()
40
+
41
+ for col in gdf.columns:
42
+ dtype = str(gdf[col].dtype)
43
+ if dtype.startswith("datetime64"):
44
+ gdf[col] = gdf[col].astype(str)
45
+
46
+ return gdf
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
- duckdb==1.0.0
2
  pandas==2.2.2
3
- #git+https://github.com/eodaGmbH/py-maplibregl@feature/color-utils
4
  git+https://github.com/eodaGmbH/py-maplibregl
5
  leafmap[maplibre]
6
  ibis-framework[duckdb]==9.1.0
 
1
+ duckdb==1.1.0
2
  pandas==2.2.2
 
3
  git+https://github.com/eodaGmbH/py-maplibregl
4
  leafmap[maplibre]
5
  ibis-framework[duckdb]==9.1.0