cboettig commited on
Commit
832f5c2
1 Parent(s): 66373a4

and so it begins...

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. README.md +0 -13
  3. app.py +60 -41
  4. france_eurocrops_2018_fiboa.parquet +0 -3
  5. requirements.txt +1 -1
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.db
2
+ .ipynb_checkpoints
3
+
README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Fiboa
3
- emoji: 📊
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.37.1
8
- app_file: app.py
9
- pinned: false
10
- license: bsd
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -10,10 +10,12 @@ import ibis
10
  from ibis import _
11
  geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
12
  con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
 
13
  crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
14
  # df = crops.to_pandas()
15
 
16
- df = crops.to_pandas()
 
17
 
18
  # +
19
  #gdf = gpd.read_parquet("be_vlg.parquet")
@@ -24,7 +26,7 @@ st.set_page_config(
24
  page_title="fiboa chat tool",
25
  page_icon="🦜",
26
  )
27
- st.title("🚧 Early prototype 🚧")
28
 
29
  # +
30
  # from langchain.chains.sql_database.prompt import PROMPT # peek at the default
@@ -34,31 +36,32 @@ new_prompt = PromptTemplate(input_variables=['dialect', 'input', 'table_info', '
34
  template=
35
  '''
36
  Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query
37
- and return the answer. Never use limit for {top_k}. You can order the results by a relevant column to return the most interesting
38
- examples in the database. This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type
39
- queries as well.
 
 
 
 
 
 
 
 
 
40
 
41
- If you are asked to "map" or "show on a map", be sure to alway select the "geometry" column in your query.
42
- In the response, return only the SQLQuery to run.
43
 
44
  Pay attention to use only the column names that you can see in the schema description. Be careful to
45
  not query for columns that do not exist. Also, pay attention to which column is in which table.
46
 
47
- Use the following format:
48
- Question: Question here
49
- SQLQuery: SQL Query to run
50
- SQLResult: Result of the SQLQuery
51
- Answer: Final answer here
52
-
53
- Only use the following tables:
54
- {table_info}
55
 
56
  Question: {input}
57
  '''
58
  )
59
  # -
60
 
61
- llm = ChatOpenAI(temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
62
 
63
  # +
64
  # Create the SQL query chain with the custom prompt
@@ -72,28 +75,20 @@ chain = create_sql_query_chain(llm, db, prompt=new_prompt, k= 11)
72
  #
73
 
74
 
 
75
 
76
 
77
- # +
78
- import lonboard
79
-
80
- def map_layer(gdf):
81
- layer = lonboard.PolygonLayer.from_geopandas(
82
- gdf,
83
- get_line_width=20, # width in default units (meters)
84
- line_width_min_pixels=0.2, # minimum width when zoomed out
85
- get_fill_color=[204, 251, 254], # light blue
86
- get_line_color=[37, 36, 34], # dark border color
87
- )
88
- m = lonboard.Map(layer)
89
- return m
90
 
91
 
92
- # -
93
-
94
  import geopandas as gpd
95
  from ibis import _
 
 
 
 
96
  def as_geopandas(response):
 
97
  sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
98
  con.raw_sql(sql_query)
99
  gdf = con.table("testing")
@@ -101,18 +96,31 @@ def as_geopandas(response):
101
  gdf = (gdf
102
  .cast({"geometry": "geometry"})
103
  .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
104
- .to_pandas())
105
- gdf.set_crs(epsg=4326, inplace=True)
106
- return map_layer(gdf)
107
- return gdf
108
 
109
 
110
- # +
111
- #response = "SELECT * FROM crops LIMIT 100"
112
- #fields = as_geopandas(response)
113
- #fields
114
  # -
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  example = "Which are the 10 largest fields?"
117
  with st.container():
118
  if prompt := st.chat_input(example, key="chain"):
@@ -120,7 +128,18 @@ with st.container():
120
  with st.chat_message("assistant"):
121
  response = chain.invoke({"question": prompt})
122
  st.write(response)
123
- result = as_geopandas(response)
124
- result
 
 
 
 
125
 
 
126
  st.divider()
 
 
 
 
 
 
 
10
  from ibis import _
11
  geoparquet = "https://data.source.coop/fiboa/be-vlg/be_vlg.parquet"
12
  con = ibis.duckdb.connect("duck.db", extensions = ["spatial"])
13
+ #con.raw_sql(f'CREATE OR REPLACE VIEW crops AS SELECT *, ST_GEOMFROMWKB(geometry) AS "geometry" FROM read_parquet("{geoparquet}")')
14
  crops = con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
15
  # df = crops.to_pandas()
16
 
17
+ # +
18
+ # df = crops.to_pandas()
19
 
20
  # +
21
  #gdf = gpd.read_parquet("be_vlg.parquet")
 
26
  page_title="fiboa chat tool",
27
  page_icon="🦜",
28
  )
29
+ st.title("FiobaGPT Prototype")
30
 
31
  # +
32
  # from langchain.chains.sql_database.prompt import PROMPT # peek at the default
 
36
  template=
37
  '''
38
  Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query
39
+ and return the answer. Only limit for {top_k} when asked for "some" or "examples".
40
+
41
+ This duckdb database includes full support for spatial queries, so it will understand most PostGIS-type
42
+ queries as well. Remember that you must cast blob column to a geom type using ST_GeomFromWKB(geometry)
43
+ before any spatial operations.
44
+
45
+
46
+ If you are asked to "map" or "show on a map", then be select the "geometry" column in your query.
47
+ If asked to show a "table", you must not include the "geometry" column from the query results.
48
+
49
+ Use the following format: return only the SQLQuery to run. DO NOT use the prefix with "SQLQuery:".
50
+ Do not include an explanation.
51
 
 
 
52
 
53
  Pay attention to use only the column names that you can see in the schema description. Be careful to
54
  not query for columns that do not exist. Also, pay attention to which column is in which table.
55
 
56
+ Tables include {table_info}. The data you should use always comes from the table called "crops".
57
+ Only use that table, do not use the "testing" table.
 
 
 
 
 
 
58
 
59
  Question: {input}
60
  '''
61
  )
62
  # -
63
 
64
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
65
 
66
  # +
67
  # Create the SQL query chain with the custom prompt
 
75
  #
76
 
77
 
78
+ # -
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
+ # +
 
84
  import geopandas as gpd
85
  from ibis import _
86
+ import re
87
+ import leafmap.maplibregl as leafmap
88
+ m = leafmap.Map()
89
+
90
  def as_geopandas(response):
91
+ response = re.sub(";$", "", response)
92
  sql_query = f"CREATE OR REPLACE VIEW testing AS ({response})"
93
  con.raw_sql(sql_query)
94
  gdf = con.table("testing")
 
96
  gdf = (gdf
97
  .cast({"geometry": "geometry"})
98
  .mutate(geometry = _.geometry.convert("EPSG:31370", "EPSG:4326"))
99
+ .to_pandas()
100
+ ).set_crs(epsg=4326, inplace=True)
101
+ return gdf
102
+ return gdf.to_pandas()
103
 
104
 
 
 
 
 
105
  # -
106
 
107
+ response = "SELECT geometry, area FROM crops ORDER BY area DESC LIMIT 10;"
108
+ as_geopandas(response)
109
+ #if 'geometry' in gdf.columns:
110
+ # m.add_gdf(gdf)
111
+ # m
112
+ #gdf
113
+
114
+ # +
115
+ '''
116
+ Ask me about fiboa data! Request "a map" to get map output, or table for tabular output, e.g.
117
+
118
+ - "Show a map with the 10 largest fields"
119
+ - "Show a table of the total area by crop type"
120
+ - "Compute the perimeters of all fields and determine which have the longest"
121
+
122
+ '''
123
+
124
  example = "Which are the 10 largest fields?"
125
  with st.container():
126
  if prompt := st.chat_input(example, key="chain"):
 
128
  with st.chat_message("assistant"):
129
  response = chain.invoke({"question": prompt})
130
  st.write(response)
131
+ gdf = as_geopandas(response)
132
+ if 'geometry' in gdf.columns:
133
+ m.add_gdf(gdf)
134
+ m.to_streamlit()
135
+ else:
136
+ st.dataframe(gdf)
137
 
138
+ # +
139
  st.divider()
140
+
141
+ '''
142
+ Data sources: https://beta.source.coop/fiboa/be-vlg
143
+ Software License: BSD
144
+
145
+ '''
france_eurocrops_2018_fiboa.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:08e11429d0bbea61d8024cfa3c2868230fd948e355161c1f2d5c144283978c92
3
- size 2314670497
 
 
 
 
requirements.txt CHANGED
@@ -6,4 +6,4 @@ duckdb_engine
6
  duckdb
7
  altair
8
  ibis-framework[duckdb]
9
- lonboard
 
6
  duckdb
7
  altair
8
  ibis-framework[duckdb]
9
+ leafmap[maplibre]