Spaces:
Running
Running
update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import ee
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import geopandas as gpd
|
7 |
-
from datetime import datetime
|
8 |
import leafmap.foliumap as leafmap
|
9 |
import re
|
10 |
from shapely.geometry import base
|
@@ -143,68 +143,98 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
|
|
143 |
return ee.Image(0).rename('custom_result').set('error', str(e))
|
144 |
|
145 |
# Aggregation functions
|
146 |
-
def
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
154 |
return ee.ImageCollection(daily_images)
|
155 |
|
156 |
-
def aggregate_data_weekly(collection):
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
return ee.ImageCollection(weekly_images)
|
171 |
|
172 |
def aggregate_data_monthly(collection, start_date, end_date):
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
def
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
return ee.ImageCollection(monthly_images)
|
182 |
|
183 |
-
def aggregate_data_yearly(collection):
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
return ee.ImageCollection(yearly_images)
|
192 |
|
|
|
|
|
|
|
|
|
|
|
193 |
# Worker function for processing a single geometry
|
194 |
-
def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula,
|
195 |
if shape_type.lower() == "point":
|
196 |
-
latitude = row.get(
|
197 |
-
longitude = row.get(
|
198 |
if pd.isna(latitude) or pd.isna(longitude):
|
199 |
-
return None
|
200 |
location_name = row.get('name', f"Location_{row.name}")
|
201 |
if kernel_size == "3x3 Kernel":
|
202 |
-
buffer_size = 45
|
203 |
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
|
204 |
elif kernel_size == "5x5 Kernel":
|
205 |
-
buffer_size = 75
|
206 |
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
|
207 |
-
else:
|
208 |
roi = ee.Geometry.Point([longitude, latitude])
|
209 |
elif shape_type.lower() == "polygon":
|
210 |
polygon_geometry = row.get('geometry')
|
@@ -214,47 +244,50 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
|
|
214 |
if not include_boundary:
|
215 |
roi = roi.buffer(-30).bounds()
|
216 |
except ValueError:
|
217 |
-
return None
|
218 |
-
|
219 |
-
collection
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
224 |
elif aggregation_period.lower() == 'weekly':
|
225 |
-
collection = aggregate_data_weekly(collection)
|
|
|
226 |
elif aggregation_period.lower() == 'monthly':
|
227 |
collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
|
|
|
228 |
elif aggregation_period.lower() == 'yearly':
|
229 |
-
collection = aggregate_data_yearly(collection)
|
|
|
|
|
|
|
|
|
|
|
230 |
# Process each image in the collection
|
231 |
image_list = collection.toList(collection.size())
|
232 |
-
processed_weeks = set()
|
233 |
aggregated_results = []
|
|
|
234 |
for i in range(image_list.size().getInfo()):
|
235 |
image = ee.Image(image_list.get(i))
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
date =
|
240 |
elif aggregation_period.lower() == 'weekly':
|
241 |
-
|
242 |
-
period_label = 'Week'
|
243 |
-
date = ee.String(timestamp).getInfo()
|
244 |
-
if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
|
245 |
-
pd.to_datetime(date) > pd.to_datetime(end_date_str) or
|
246 |
-
date in processed_weeks):
|
247 |
-
continue
|
248 |
-
processed_weeks.add(date)
|
249 |
elif aggregation_period.lower() == 'monthly':
|
250 |
-
|
251 |
-
period_label = 'Month'
|
252 |
-
date = ee.Date(timestamp).format('YYYY-MM').getInfo()
|
253 |
elif aggregation_period.lower() == 'yearly':
|
254 |
-
|
255 |
-
|
256 |
-
date =
|
|
|
|
|
257 |
index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
|
|
|
258 |
try:
|
259 |
index_value = index_image.reduceRegion(
|
260 |
reducer=get_reducer(reducer_choice),
|
@@ -262,29 +295,30 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
|
|
262 |
scale=30
|
263 |
).get('custom_result')
|
264 |
calculated_value = index_value.getInfo()
|
|
|
265 |
if isinstance(calculated_value, (int, float)):
|
266 |
result = {
|
267 |
'Location Name': location_name,
|
268 |
period_label: date,
|
269 |
-
'Start Date': start_date_str,
|
270 |
-
'End Date': end_date_str,
|
271 |
'Calculated Value': calculated_value
|
272 |
}
|
273 |
if shape_type.lower() == 'point':
|
274 |
-
result[
|
275 |
-
result[
|
276 |
aggregated_results.append(result)
|
277 |
except Exception as e:
|
278 |
st.error(f"Error retrieving value for {location_name}: {e}")
|
|
|
279 |
return aggregated_results
|
280 |
|
281 |
# Main processing function
|
282 |
-
def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None
|
283 |
aggregated_results = []
|
284 |
total_steps = len(locations_df)
|
285 |
progress_bar = st.progress(0)
|
286 |
progress_text = st.empty()
|
287 |
-
|
|
|
288 |
with ThreadPoolExecutor(max_workers=10) as executor:
|
289 |
futures = []
|
290 |
for idx, row in locations_df.iterrows():
|
@@ -299,12 +333,13 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
|
|
299 |
shape_type,
|
300 |
aggregation_period,
|
301 |
custom_formula,
|
|
|
|
|
302 |
kernel_size,
|
303 |
-
include_boundary
|
304 |
-
lat_col,
|
305 |
-
lon_col
|
306 |
)
|
307 |
futures.append(future)
|
|
|
308 |
completed = 0
|
309 |
for future in as_completed(futures):
|
310 |
result = future.result()
|
@@ -314,26 +349,14 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
|
|
314 |
progress_percentage = completed / total_steps
|
315 |
progress_bar.progress(progress_percentage)
|
316 |
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
|
317 |
-
|
318 |
end_time = time.time()
|
319 |
-
processing_time = end_time - start_time
|
|
|
320 |
if aggregated_results:
|
321 |
result_df = pd.DataFrame(aggregated_results)
|
322 |
-
|
323 |
-
|
324 |
-
'Start Date': 'first',
|
325 |
-
'End Date': 'first',
|
326 |
-
'Calculated Value': 'mean'
|
327 |
-
}
|
328 |
-
if shape_type.lower() == 'point':
|
329 |
-
agg_dict[lat_col] = 'first'
|
330 |
-
agg_dict[lon_col] = 'first'
|
331 |
-
aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
|
332 |
-
aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
|
333 |
-
return aggregated_output.to_dict(orient='records'), processing_time # Return processing time
|
334 |
-
else:
|
335 |
-
return result_df.to_dict(orient='records'), processing_time
|
336 |
-
return [], processing_time
|
337 |
|
338 |
# Streamlit App Logic
|
339 |
st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
|
@@ -341,6 +364,7 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
|
|
341 |
|
342 |
# Initialize data as an empty dictionary
|
343 |
data = {}
|
|
|
344 |
if imagery_base == "Sentinel":
|
345 |
dataset_file = "sentinel_datasets.json"
|
346 |
try:
|
@@ -392,10 +416,12 @@ if not data:
|
|
392 |
st.stop()
|
393 |
|
394 |
st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
|
|
|
395 |
main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
|
396 |
|
397 |
sub_selection = None
|
398 |
dataset_id = None
|
|
|
399 |
if main_selection:
|
400 |
sub_options = data[main_selection]["sub_options"]
|
401 |
sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
|
@@ -405,6 +431,7 @@ if main_selection:
|
|
405 |
dataset_id = sub_selection
|
406 |
|
407 |
st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
|
|
|
408 |
if main_selection and sub_selection:
|
409 |
dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
|
410 |
st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
|
@@ -414,9 +441,11 @@ if main_selection and sub_selection:
|
|
414 |
default=[dataset_bands[0]] if dataset_bands else [],
|
415 |
help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
|
416 |
)
|
|
|
417 |
if len(selected_bands) < 1:
|
418 |
st.warning("Please select at least one band.")
|
419 |
st.stop()
|
|
|
420 |
if selected_bands:
|
421 |
if len(selected_bands) == 1:
|
422 |
default_formula = f"{selected_bands[0]}"
|
@@ -429,6 +458,7 @@ if main_selection and sub_selection:
|
|
429 |
value=default_formula,
|
430 |
help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
|
431 |
)
|
|
|
432 |
def validate_formula(formula, selected_bands):
|
433 |
allowed_chars = set(" +-*/()0123456789.")
|
434 |
terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
|
@@ -438,6 +468,7 @@ if main_selection and sub_selection:
|
|
438 |
if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
|
439 |
return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
|
440 |
return True, ""
|
|
|
441 |
is_valid, error_message = validate_formula(custom_formula, selected_bands)
|
442 |
if not is_valid:
|
443 |
st.error(error_message)
|
@@ -445,6 +476,7 @@ if main_selection and sub_selection:
|
|
445 |
elif not custom_formula:
|
446 |
st.warning("Please enter a custom formula to proceed.")
|
447 |
st.stop()
|
|
|
448 |
st.write(f"Custom Formula: {custom_formula}")
|
449 |
|
450 |
reducer_choice = st.selectbox(
|
@@ -453,14 +485,14 @@ reducer_choice = st.selectbox(
|
|
453 |
index=0
|
454 |
)
|
455 |
|
456 |
-
start_date = st.date_input("Start Date", value=pd.to_datetime('2024-
|
457 |
-
end_date = st.date_input("End Date", value=pd.to_datetime('2024-
|
458 |
start_date_str = start_date.strftime('%Y-%m-%d')
|
459 |
end_date_str = end_date.strftime('%Y-%m-%d')
|
460 |
|
461 |
aggregation_period = st.selectbox(
|
462 |
-
"Select Aggregation Period
|
463 |
-
["
|
464 |
index=0
|
465 |
)
|
466 |
|
@@ -471,7 +503,7 @@ include_boundary = None
|
|
471 |
|
472 |
if shape_type.lower() == "point":
|
473 |
kernel_size = st.selectbox(
|
474 |
-
"Select Calculation Area
|
475 |
["Point", "3x3 Kernel", "5x5 Kernel"],
|
476 |
index=0,
|
477 |
help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
|
@@ -485,46 +517,51 @@ elif shape_type.lower() == "polygon":
|
|
485 |
|
486 |
file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
|
487 |
locations_df = pd.DataFrame()
|
|
|
|
|
488 |
|
489 |
if file_upload is not None:
|
490 |
if shape_type.lower() == "point":
|
491 |
if file_upload.name.endswith('.csv'):
|
492 |
-
# Read the CSV file
|
493 |
locations_df = pd.read_csv(file_upload)
|
494 |
-
|
495 |
st.write("Preview of your uploaded data (first 5 rows):")
|
496 |
st.dataframe(locations_df.head())
|
497 |
-
|
498 |
-
|
|
|
499 |
col1, col2 = st.columns(2)
|
500 |
with col1:
|
501 |
-
|
502 |
-
"Select
|
503 |
-
options=
|
504 |
-
|
|
|
505 |
)
|
506 |
with col2:
|
507 |
-
|
508 |
-
"Select
|
509 |
-
options=
|
510 |
-
|
|
|
511 |
)
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
st.error("Error: Selected Latitude and Longitude columns must contain numeric values.")
|
516 |
st.stop()
|
517 |
-
|
|
|
|
|
|
|
|
|
|
|
518 |
elif file_upload.name.endswith('.geojson'):
|
519 |
locations_df = gpd.read_file(file_upload)
|
520 |
if 'geometry' in locations_df.columns:
|
521 |
-
|
522 |
-
locations_df['
|
523 |
-
|
524 |
-
|
525 |
-
# Preserve original column names
|
526 |
-
lat_col = 'original_latitude'
|
527 |
-
lon_col = 'original_longitude'
|
528 |
else:
|
529 |
st.error("GeoJSON file doesn't contain geometry column")
|
530 |
st.stop()
|
@@ -547,25 +584,24 @@ if file_upload is not None:
|
|
547 |
st.error("No valid Point data found in the KML file.")
|
548 |
else:
|
549 |
locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
|
550 |
-
locations_df['
|
551 |
-
locations_df['
|
552 |
-
|
553 |
-
|
554 |
-
lat_col = 'original_latitude'
|
555 |
-
lon_col = 'original_longitude'
|
556 |
except Exception as e:
|
557 |
st.error(f"Error parsing KML file: {str(e)}")
|
558 |
-
|
559 |
-
if not locations_df.empty and
|
560 |
-
m = leafmap.Map(center=[locations_df[
|
561 |
for _, row in locations_df.iterrows():
|
562 |
-
latitude = row[
|
563 |
-
longitude = row[
|
564 |
if pd.isna(latitude) or pd.isna(longitude):
|
565 |
continue
|
566 |
m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
|
567 |
st.write("Map of Uploaded Points:")
|
568 |
m.to_streamlit()
|
|
|
569 |
elif shape_type.lower() == "polygon":
|
570 |
if file_upload.name.endswith('.csv'):
|
571 |
st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
|
@@ -595,7 +631,7 @@ if file_upload is not None:
|
|
595 |
locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
|
596 |
except Exception as e:
|
597 |
st.error(f"Error parsing KML file: {str(e)}")
|
598 |
-
|
599 |
if not locations_df.empty and 'geometry' in locations_df.columns:
|
600 |
centroid_lat = locations_df.geometry.centroid.y.mean()
|
601 |
centroid_lon = locations_df.geometry.centroid.x.mean()
|
@@ -621,16 +657,18 @@ if st.button(f"Calculate {custom_formula}"):
|
|
621 |
reducer_choice,
|
622 |
shape_type,
|
623 |
aggregation_period,
|
|
|
|
|
624 |
custom_formula,
|
625 |
kernel_size,
|
626 |
-
include_boundary
|
627 |
-
lat_col=lat_col if shape_type.lower() == "point" else None,
|
628 |
-
lon_col=lon_col if shape_type.lower() == "point" else None
|
629 |
)
|
|
|
630 |
if results:
|
631 |
result_df = pd.DataFrame(results)
|
632 |
st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
|
633 |
st.dataframe(result_df)
|
|
|
634 |
filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
|
635 |
st.download_button(
|
636 |
label="Download results as CSV",
|
@@ -638,10 +676,12 @@ if st.button(f"Calculate {custom_formula}"):
|
|
638 |
file_name=filename,
|
639 |
mime='text/csv'
|
640 |
)
|
|
|
641 |
st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
|
642 |
else:
|
643 |
st.warning("No results were generated. Check your inputs or formula.")
|
644 |
st.info(f"Total processing time: {processing_time:.2f} seconds.")
|
|
|
645 |
except Exception as e:
|
646 |
st.error(f"An error occurred during processing: {str(e)}")
|
647 |
else:
|
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
import geopandas as gpd
|
7 |
+
from datetime import datetime, timedelta
|
8 |
import leafmap.foliumap as leafmap
|
9 |
import re
|
10 |
from shapely.geometry import base
|
|
|
143 |
return ee.Image(0).rename('custom_result').set('error', str(e))
|
144 |
|
145 |
# Aggregation functions
|
146 |
+
def aggregate_data_daily(collection, start_date, end_date):
|
147 |
+
# Create date range for daily aggregation
|
148 |
+
dates = pd.date_range(start=start_date, end=end_date, freq='D')
|
149 |
+
|
150 |
+
def get_daily_image(date):
|
151 |
+
date_str = date.strftime('%Y-%m-%d')
|
152 |
+
daily_collection = collection.filterDate(ee.Date(date_str), ee.Date(date_str).advance(1, 'day'))
|
153 |
+
return daily_collection.mean().set('date', date_str)
|
154 |
+
|
155 |
+
daily_images = []
|
156 |
+
for date in dates:
|
157 |
+
daily_images.append(get_daily_image(date))
|
158 |
+
|
159 |
return ee.ImageCollection(daily_images)
|
160 |
|
161 |
+
def aggregate_data_weekly(collection, start_date, end_date):
|
162 |
+
# Generate weekly intervals starting exactly from start_date
|
163 |
+
start_date = pd.to_datetime(start_date)
|
164 |
+
end_date = pd.to_datetime(end_date)
|
165 |
+
|
166 |
+
weekly_intervals = []
|
167 |
+
current_date = start_date
|
168 |
+
while current_date <= end_date:
|
169 |
+
next_date = current_date + timedelta(days=6) # 7-day week (inclusive)
|
170 |
+
if next_date > end_date:
|
171 |
+
next_date = end_date
|
172 |
+
weekly_intervals.append((current_date.strftime('%Y-%m-%d'), next_date.strftime('%Y-%m-%d')))
|
173 |
+
current_date = next_date + timedelta(days=1)
|
174 |
+
|
175 |
+
def get_weekly_image(start, end):
|
176 |
+
weekly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
|
177 |
+
return weekly_collection.mean().set('week_start', start).set('week_end', end)
|
178 |
+
|
179 |
+
weekly_images = []
|
180 |
+
for start, end in weekly_intervals:
|
181 |
+
weekly_images.append(get_weekly_image(start, end))
|
182 |
+
|
183 |
return ee.ImageCollection(weekly_images)
|
184 |
|
185 |
def aggregate_data_monthly(collection, start_date, end_date):
|
186 |
+
# Create monthly aggregation
|
187 |
+
dates = pd.date_range(start=start_date, end=end_date, freq='MS') # Month Start
|
188 |
+
|
189 |
+
def get_monthly_image(date):
|
190 |
+
date_str = date.strftime('%Y-%m-%d')
|
191 |
+
next_month = date + pd.offsets.MonthBegin(1)
|
192 |
+
next_month_str = next_month.strftime('%Y-%m-%d')
|
193 |
+
monthly_collection = collection.filterDate(ee.Date(date_str), ee.Date(next_month_str))
|
194 |
+
return monthly_collection.mean().set('month', date.strftime('%Y-%m'))
|
195 |
+
|
196 |
+
monthly_images = []
|
197 |
+
for date in dates:
|
198 |
+
monthly_images.append(get_monthly_image(date))
|
199 |
+
|
200 |
return ee.ImageCollection(monthly_images)
|
201 |
|
202 |
+
def aggregate_data_yearly(collection, start_date, end_date):
|
203 |
+
# Create yearly aggregation
|
204 |
+
years = range(pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)
|
205 |
+
|
206 |
+
def get_yearly_image(year):
|
207 |
+
start = f"{year}-01-01"
|
208 |
+
end = f"{year+1}-01-01"
|
209 |
+
yearly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
|
210 |
+
return yearly_collection.mean().set('year', str(year))
|
211 |
+
|
212 |
+
yearly_images = []
|
213 |
+
for year in years:
|
214 |
+
yearly_images.append(get_yearly_image(year))
|
215 |
+
|
216 |
return ee.ImageCollection(yearly_images)
|
217 |
|
218 |
+
def aggregate_data_custom(collection, start_date, end_date):
|
219 |
+
# Custom aggregation (entire date range)
|
220 |
+
collection = collection.filterDate(ee.Date(start_date), ee.Date(end_date))
|
221 |
+
return collection.mean().set('start_date', start_date).set('end_date', end_date)
|
222 |
+
|
223 |
# Worker function for processing a single geometry
|
224 |
+
def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
|
225 |
if shape_type.lower() == "point":
|
226 |
+
latitude = row.get('latitude')
|
227 |
+
longitude = row.get('longitude')
|
228 |
if pd.isna(latitude) or pd.isna(longitude):
|
229 |
+
return None
|
230 |
location_name = row.get('name', f"Location_{row.name}")
|
231 |
if kernel_size == "3x3 Kernel":
|
232 |
+
buffer_size = 45
|
233 |
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
|
234 |
elif kernel_size == "5x5 Kernel":
|
235 |
+
buffer_size = 75
|
236 |
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
|
237 |
+
else:
|
238 |
roi = ee.Geometry.Point([longitude, latitude])
|
239 |
elif shape_type.lower() == "polygon":
|
240 |
polygon_geometry = row.get('geometry')
|
|
|
244 |
if not include_boundary:
|
245 |
roi = roi.buffer(-30).bounds()
|
246 |
except ValueError:
|
247 |
+
return None
|
248 |
+
|
249 |
+
# Filter collection by location
|
250 |
+
collection = ee.ImageCollection(dataset_id).filterBounds(roi)
|
251 |
+
|
252 |
+
# Apply temporal aggregation based on selected period
|
253 |
+
if aggregation_period.lower() == 'daily':
|
254 |
+
collection = aggregate_data_daily(collection, start_date_str, end_date_str)
|
255 |
+
period_label = 'Date'
|
256 |
elif aggregation_period.lower() == 'weekly':
|
257 |
+
collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
|
258 |
+
period_label = 'Week'
|
259 |
elif aggregation_period.lower() == 'monthly':
|
260 |
collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
|
261 |
+
period_label = 'Month'
|
262 |
elif aggregation_period.lower() == 'yearly':
|
263 |
+
collection = aggregate_data_yearly(collection, start_date_str, end_date_str)
|
264 |
+
period_label = 'Year'
|
265 |
+
else: # Custom
|
266 |
+
collection = ee.ImageCollection([aggregate_data_custom(collection, start_date_str, end_date_str)])
|
267 |
+
period_label = 'Date Range'
|
268 |
+
|
269 |
# Process each image in the collection
|
270 |
image_list = collection.toList(collection.size())
|
|
|
271 |
aggregated_results = []
|
272 |
+
|
273 |
for i in range(image_list.size().getInfo()):
|
274 |
image = ee.Image(image_list.get(i))
|
275 |
+
|
276 |
+
# Get the appropriate date label based on aggregation period
|
277 |
+
if aggregation_period.lower() == 'daily':
|
278 |
+
date = image.get('date').getInfo()
|
279 |
elif aggregation_period.lower() == 'weekly':
|
280 |
+
date = f"{image.get('week_start').getInfo()} to {image.get('week_end').getInfo()}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
elif aggregation_period.lower() == 'monthly':
|
282 |
+
date = image.get('month').getInfo()
|
|
|
|
|
283 |
elif aggregation_period.lower() == 'yearly':
|
284 |
+
date = image.get('year').getInfo()
|
285 |
+
else: # Custom
|
286 |
+
date = f"{image.get('start_date').getInfo()} to {image.get('end_date').getInfo()}"
|
287 |
+
|
288 |
+
# Calculate the custom formula
|
289 |
index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
|
290 |
+
|
291 |
try:
|
292 |
index_value = index_image.reduceRegion(
|
293 |
reducer=get_reducer(reducer_choice),
|
|
|
295 |
scale=30
|
296 |
).get('custom_result')
|
297 |
calculated_value = index_value.getInfo()
|
298 |
+
|
299 |
if isinstance(calculated_value, (int, float)):
|
300 |
result = {
|
301 |
'Location Name': location_name,
|
302 |
period_label: date,
|
|
|
|
|
303 |
'Calculated Value': calculated_value
|
304 |
}
|
305 |
if shape_type.lower() == 'point':
|
306 |
+
result[original_lat_col] = latitude
|
307 |
+
result[original_lon_col] = longitude
|
308 |
aggregated_results.append(result)
|
309 |
except Exception as e:
|
310 |
st.error(f"Error retrieving value for {location_name}: {e}")
|
311 |
+
|
312 |
return aggregated_results
|
313 |
|
314 |
# Main processing function
|
315 |
+
def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
|
316 |
aggregated_results = []
|
317 |
total_steps = len(locations_df)
|
318 |
progress_bar = st.progress(0)
|
319 |
progress_text = st.empty()
|
320 |
+
|
321 |
+
start_time = time.time()
|
322 |
with ThreadPoolExecutor(max_workers=10) as executor:
|
323 |
futures = []
|
324 |
for idx, row in locations_df.iterrows():
|
|
|
333 |
shape_type,
|
334 |
aggregation_period,
|
335 |
custom_formula,
|
336 |
+
original_lat_col,
|
337 |
+
original_lon_col,
|
338 |
kernel_size,
|
339 |
+
include_boundary
|
|
|
|
|
340 |
)
|
341 |
futures.append(future)
|
342 |
+
|
343 |
completed = 0
|
344 |
for future in as_completed(futures):
|
345 |
result = future.result()
|
|
|
349 |
progress_percentage = completed / total_steps
|
350 |
progress_bar.progress(progress_percentage)
|
351 |
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
|
352 |
+
|
353 |
end_time = time.time()
|
354 |
+
processing_time = end_time - start_time
|
355 |
+
|
356 |
if aggregated_results:
|
357 |
result_df = pd.DataFrame(aggregated_results)
|
358 |
+
return result_df.to_dict(orient='records'), processing_time
|
359 |
+
return [], processing_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
361 |
# Streamlit App Logic
|
362 |
st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
|
|
|
364 |
|
365 |
# Initialize data as an empty dictionary
|
366 |
data = {}
|
367 |
+
|
368 |
if imagery_base == "Sentinel":
|
369 |
dataset_file = "sentinel_datasets.json"
|
370 |
try:
|
|
|
416 |
st.stop()
|
417 |
|
418 |
st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
|
419 |
+
|
420 |
main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
|
421 |
|
422 |
sub_selection = None
|
423 |
dataset_id = None
|
424 |
+
|
425 |
if main_selection:
|
426 |
sub_options = data[main_selection]["sub_options"]
|
427 |
sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
|
|
|
431 |
dataset_id = sub_selection
|
432 |
|
433 |
st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
|
434 |
+
|
435 |
if main_selection and sub_selection:
|
436 |
dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
|
437 |
st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
|
|
|
441 |
default=[dataset_bands[0]] if dataset_bands else [],
|
442 |
help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
|
443 |
)
|
444 |
+
|
445 |
if len(selected_bands) < 1:
|
446 |
st.warning("Please select at least one band.")
|
447 |
st.stop()
|
448 |
+
|
449 |
if selected_bands:
|
450 |
if len(selected_bands) == 1:
|
451 |
default_formula = f"{selected_bands[0]}"
|
|
|
458 |
value=default_formula,
|
459 |
help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
|
460 |
)
|
461 |
+
|
462 |
def validate_formula(formula, selected_bands):
|
463 |
allowed_chars = set(" +-*/()0123456789.")
|
464 |
terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
|
|
|
468 |
if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
|
469 |
return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
|
470 |
return True, ""
|
471 |
+
|
472 |
is_valid, error_message = validate_formula(custom_formula, selected_bands)
|
473 |
if not is_valid:
|
474 |
st.error(error_message)
|
|
|
476 |
elif not custom_formula:
|
477 |
st.warning("Please enter a custom formula to proceed.")
|
478 |
st.stop()
|
479 |
+
|
480 |
st.write(f"Custom Formula: {custom_formula}")
|
481 |
|
482 |
reducer_choice = st.selectbox(
|
|
|
485 |
index=0
|
486 |
)
|
487 |
|
488 |
+
start_date = st.date_input("Start Date", value=pd.to_datetime('2024-01-01'))
|
489 |
+
end_date = st.date_input("End Date", value=pd.to_datetime('2024-01-31'))
|
490 |
start_date_str = start_date.strftime('%Y-%m-%d')
|
491 |
end_date_str = end_date.strftime('%Y-%m-%d')
|
492 |
|
493 |
aggregation_period = st.selectbox(
|
494 |
+
"Select Aggregation Period",
|
495 |
+
["Daily", "Weekly", "Monthly", "Yearly", "Custom (Complete Date Range)"],
|
496 |
index=0
|
497 |
)
|
498 |
|
|
|
503 |
|
504 |
if shape_type.lower() == "point":
|
505 |
kernel_size = st.selectbox(
|
506 |
+
"Select Calculation Area",
|
507 |
["Point", "3x3 Kernel", "5x5 Kernel"],
|
508 |
index=0,
|
509 |
help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
|
|
|
517 |
|
518 |
file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
|
519 |
locations_df = pd.DataFrame()
|
520 |
+
original_lat_col = None
|
521 |
+
original_lon_col = None
|
522 |
|
523 |
if file_upload is not None:
|
524 |
if shape_type.lower() == "point":
|
525 |
if file_upload.name.endswith('.csv'):
|
|
|
526 |
locations_df = pd.read_csv(file_upload)
|
527 |
+
|
528 |
st.write("Preview of your uploaded data (first 5 rows):")
|
529 |
st.dataframe(locations_df.head())
|
530 |
+
|
531 |
+
all_columns = locations_df.columns.tolist()
|
532 |
+
|
533 |
col1, col2 = st.columns(2)
|
534 |
with col1:
|
535 |
+
original_lat_col = st.selectbox(
|
536 |
+
"Select Latitude Column",
|
537 |
+
options=all_columns,
|
538 |
+
index=all_columns.index('latitude') if 'latitude' in all_columns else 0,
|
539 |
+
help="Select the column containing latitude values"
|
540 |
)
|
541 |
with col2:
|
542 |
+
original_lon_col = st.selectbox(
|
543 |
+
"Select Longitude Column",
|
544 |
+
options=all_columns,
|
545 |
+
index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
|
546 |
+
help="Select the column containing longitude values"
|
547 |
)
|
548 |
+
|
549 |
+
if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
|
550 |
+
st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
|
|
|
551 |
st.stop()
|
552 |
+
|
553 |
+
locations_df = locations_df.rename(columns={
|
554 |
+
original_lat_col: 'latitude',
|
555 |
+
original_lon_col: 'longitude'
|
556 |
+
})
|
557 |
+
|
558 |
elif file_upload.name.endswith('.geojson'):
|
559 |
locations_df = gpd.read_file(file_upload)
|
560 |
if 'geometry' in locations_df.columns:
|
561 |
+
locations_df['latitude'] = locations_df['geometry'].y
|
562 |
+
locations_df['longitude'] = locations_df['geometry'].x
|
563 |
+
original_lat_col = 'latitude'
|
564 |
+
original_lon_col = 'longitude'
|
|
|
|
|
|
|
565 |
else:
|
566 |
st.error("GeoJSON file doesn't contain geometry column")
|
567 |
st.stop()
|
|
|
584 |
st.error("No valid Point data found in the KML file.")
|
585 |
else:
|
586 |
locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
|
587 |
+
locations_df['latitude'] = locations_df['geometry'].y
|
588 |
+
locations_df['longitude'] = locations_df['geometry'].x
|
589 |
+
original_lat_col = 'latitude'
|
590 |
+
original_lon_col = 'longitude'
|
|
|
|
|
591 |
except Exception as e:
|
592 |
st.error(f"Error parsing KML file: {str(e)}")
|
593 |
+
|
594 |
+
if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
|
595 |
+
m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
|
596 |
for _, row in locations_df.iterrows():
|
597 |
+
latitude = row['latitude']
|
598 |
+
longitude = row['longitude']
|
599 |
if pd.isna(latitude) or pd.isna(longitude):
|
600 |
continue
|
601 |
m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
|
602 |
st.write("Map of Uploaded Points:")
|
603 |
m.to_streamlit()
|
604 |
+
|
605 |
elif shape_type.lower() == "polygon":
|
606 |
if file_upload.name.endswith('.csv'):
|
607 |
st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
|
|
|
631 |
locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
|
632 |
except Exception as e:
|
633 |
st.error(f"Error parsing KML file: {str(e)}")
|
634 |
+
|
635 |
if not locations_df.empty and 'geometry' in locations_df.columns:
|
636 |
centroid_lat = locations_df.geometry.centroid.y.mean()
|
637 |
centroid_lon = locations_df.geometry.centroid.x.mean()
|
|
|
657 |
reducer_choice,
|
658 |
shape_type,
|
659 |
aggregation_period,
|
660 |
+
original_lat_col,
|
661 |
+
original_lon_col,
|
662 |
custom_formula,
|
663 |
kernel_size,
|
664 |
+
include_boundary
|
|
|
|
|
665 |
)
|
666 |
+
|
667 |
if results:
|
668 |
result_df = pd.DataFrame(results)
|
669 |
st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
|
670 |
st.dataframe(result_df)
|
671 |
+
|
672 |
filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
|
673 |
st.download_button(
|
674 |
label="Download results as CSV",
|
|
|
676 |
file_name=filename,
|
677 |
mime='text/csv'
|
678 |
)
|
679 |
+
|
680 |
st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
|
681 |
else:
|
682 |
st.warning("No results were generated. Check your inputs or formula.")
|
683 |
st.info(f"Total processing time: {processing_time:.2f} seconds.")
|
684 |
+
|
685 |
except Exception as e:
|
686 |
st.error(f"An error occurred during processing: {str(e)}")
|
687 |
else:
|