YashMK89 commited on
Commit
a57f55b
·
verified ·
1 Parent(s): fdee337

update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -144
app.py CHANGED
@@ -4,7 +4,7 @@ import ee
4
  import os
5
  import pandas as pd
6
  import geopandas as gpd
7
- from datetime import datetime
8
  import leafmap.foliumap as leafmap
9
  import re
10
  from shapely.geometry import base
@@ -143,68 +143,98 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
143
  return ee.Image(0).rename('custom_result').set('error', str(e))
144
 
145
  # Aggregation functions
146
- def aggregate_data_custom(collection):
147
- collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
148
- grouped_by_day = collection.aggregate_array('day').distinct()
149
- def calculate_daily_mean(day):
150
- daily_collection = collection.filter(ee.Filter.eq('day', day))
151
- daily_mean = daily_collection.mean()
152
- return daily_mean.set('day', day)
153
- daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
 
 
 
 
 
154
  return ee.ImageCollection(daily_images)
155
 
156
- def aggregate_data_weekly(collection):
157
- def set_week_start(image):
158
- date = ee.Date(image.get('system:time_start'))
159
- days_since_week_start = date.getRelative('day', 'week')
160
- offset = ee.Number(days_since_week_start).multiply(-1)
161
- week_start = date.advance(offset, 'day')
162
- return image.set('week_start', week_start.format('YYYY-MM-dd'))
163
- collection = collection.map(set_week_start)
164
- grouped_by_week = collection.aggregate_array('week_start').distinct()
165
- def calculate_weekly_mean(week_start):
166
- weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
167
- weekly_mean = weekly_collection.mean()
168
- return weekly_mean.set('week_start', week_start)
169
- weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
 
 
 
 
 
 
 
 
170
  return ee.ImageCollection(weekly_images)
171
 
172
  def aggregate_data_monthly(collection, start_date, end_date):
173
- collection = collection.filterDate(start_date, end_date)
174
- collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
175
- grouped_by_month = collection.aggregate_array('month').distinct()
176
- def calculate_monthly_mean(month):
177
- monthly_collection = collection.filter(ee.Filter.eq('month', month))
178
- monthly_mean = monthly_collection.mean()
179
- return monthly_mean.set('month', month)
180
- monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
 
 
 
 
 
 
181
  return ee.ImageCollection(monthly_images)
182
 
183
- def aggregate_data_yearly(collection):
184
- collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
185
- grouped_by_year = collection.aggregate_array('year').distinct()
186
- def calculate_yearly_mean(year):
187
- yearly_collection = collection.filter(ee.Filter.eq('year', year))
188
- yearly_mean = yearly_collection.mean()
189
- return yearly_mean.set('year', year)
190
- yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
 
 
 
 
 
 
191
  return ee.ImageCollection(yearly_images)
192
 
 
 
 
 
 
193
  # Worker function for processing a single geometry
194
- def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
195
  if shape_type.lower() == "point":
196
- latitude = row.get(lat_col)
197
- longitude = row.get(lon_col)
198
  if pd.isna(latitude) or pd.isna(longitude):
199
- return None # Skip invalid points
200
  location_name = row.get('name', f"Location_{row.name}")
201
  if kernel_size == "3x3 Kernel":
202
- buffer_size = 45 # 90m x 90m
203
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
204
  elif kernel_size == "5x5 Kernel":
205
- buffer_size = 75 # 150m x 150m
206
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
207
- else: # Point
208
  roi = ee.Geometry.Point([longitude, latitude])
209
  elif shape_type.lower() == "polygon":
210
  polygon_geometry = row.get('geometry')
@@ -214,47 +244,50 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
214
  if not include_boundary:
215
  roi = roi.buffer(-30).bounds()
216
  except ValueError:
217
- return None # Skip invalid polygons
218
- # Filter and aggregate the image collection
219
- collection = ee.ImageCollection(dataset_id) \
220
- .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
221
- .filterBounds(roi)
222
- if aggregation_period.lower() == 'custom (start date to end date)':
223
- collection = aggregate_data_custom(collection)
 
 
224
  elif aggregation_period.lower() == 'weekly':
225
- collection = aggregate_data_weekly(collection)
 
226
  elif aggregation_period.lower() == 'monthly':
227
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
 
228
  elif aggregation_period.lower() == 'yearly':
229
- collection = aggregate_data_yearly(collection)
 
 
 
 
 
230
  # Process each image in the collection
231
  image_list = collection.toList(collection.size())
232
- processed_weeks = set()
233
  aggregated_results = []
 
234
  for i in range(image_list.size().getInfo()):
235
  image = ee.Image(image_list.get(i))
236
- if aggregation_period.lower() == 'custom (start date to end date)':
237
- timestamp = image.get('day')
238
- period_label = 'Date'
239
- date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
240
  elif aggregation_period.lower() == 'weekly':
241
- timestamp = image.get('week_start')
242
- period_label = 'Week'
243
- date = ee.String(timestamp).getInfo()
244
- if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
245
- pd.to_datetime(date) > pd.to_datetime(end_date_str) or
246
- date in processed_weeks):
247
- continue
248
- processed_weeks.add(date)
249
  elif aggregation_period.lower() == 'monthly':
250
- timestamp = image.get('month')
251
- period_label = 'Month'
252
- date = ee.Date(timestamp).format('YYYY-MM').getInfo()
253
  elif aggregation_period.lower() == 'yearly':
254
- timestamp = image.get('year')
255
- period_label = 'Year'
256
- date = ee.Date(timestamp).format('YYYY').getInfo()
 
 
257
  index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
 
258
  try:
259
  index_value = index_image.reduceRegion(
260
  reducer=get_reducer(reducer_choice),
@@ -262,29 +295,30 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
262
  scale=30
263
  ).get('custom_result')
264
  calculated_value = index_value.getInfo()
 
265
  if isinstance(calculated_value, (int, float)):
266
  result = {
267
  'Location Name': location_name,
268
  period_label: date,
269
- 'Start Date': start_date_str,
270
- 'End Date': end_date_str,
271
  'Calculated Value': calculated_value
272
  }
273
  if shape_type.lower() == 'point':
274
- result[lat_col] = latitude
275
- result[lon_col] = longitude
276
  aggregated_results.append(result)
277
  except Exception as e:
278
  st.error(f"Error retrieving value for {location_name}: {e}")
 
279
  return aggregated_results
280
 
281
  # Main processing function
282
- def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
283
  aggregated_results = []
284
  total_steps = len(locations_df)
285
  progress_bar = st.progress(0)
286
  progress_text = st.empty()
287
- start_time = time.time() # Start timing the process
 
288
  with ThreadPoolExecutor(max_workers=10) as executor:
289
  futures = []
290
  for idx, row in locations_df.iterrows():
@@ -299,12 +333,13 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
299
  shape_type,
300
  aggregation_period,
301
  custom_formula,
 
 
302
  kernel_size,
303
- include_boundary,
304
- lat_col,
305
- lon_col
306
  )
307
  futures.append(future)
 
308
  completed = 0
309
  for future in as_completed(futures):
310
  result = future.result()
@@ -314,26 +349,14 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
314
  progress_percentage = completed / total_steps
315
  progress_bar.progress(progress_percentage)
316
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
317
- # End timing the process
318
  end_time = time.time()
319
- processing_time = end_time - start_time # Calculate total processing time
 
320
  if aggregated_results:
321
  result_df = pd.DataFrame(aggregated_results)
322
- if aggregation_period.lower() == 'custom (start date to end date)':
323
- agg_dict = {
324
- 'Start Date': 'first',
325
- 'End Date': 'first',
326
- 'Calculated Value': 'mean'
327
- }
328
- if shape_type.lower() == 'point':
329
- agg_dict[lat_col] = 'first'
330
- agg_dict[lon_col] = 'first'
331
- aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
332
- aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
333
- return aggregated_output.to_dict(orient='records'), processing_time # Return processing time
334
- else:
335
- return result_df.to_dict(orient='records'), processing_time
336
- return [], processing_time
337
 
338
  # Streamlit App Logic
339
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
@@ -341,6 +364,7 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
341
 
342
  # Initialize data as an empty dictionary
343
  data = {}
 
344
  if imagery_base == "Sentinel":
345
  dataset_file = "sentinel_datasets.json"
346
  try:
@@ -392,10 +416,12 @@ if not data:
392
  st.stop()
393
 
394
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 
395
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
396
 
397
  sub_selection = None
398
  dataset_id = None
 
399
  if main_selection:
400
  sub_options = data[main_selection]["sub_options"]
401
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -405,6 +431,7 @@ if main_selection:
405
  dataset_id = sub_selection
406
 
407
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 
408
  if main_selection and sub_selection:
409
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
410
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
@@ -414,9 +441,11 @@ if main_selection and sub_selection:
414
  default=[dataset_bands[0]] if dataset_bands else [],
415
  help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
416
  )
 
417
  if len(selected_bands) < 1:
418
  st.warning("Please select at least one band.")
419
  st.stop()
 
420
  if selected_bands:
421
  if len(selected_bands) == 1:
422
  default_formula = f"{selected_bands[0]}"
@@ -429,6 +458,7 @@ if main_selection and sub_selection:
429
  value=default_formula,
430
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
431
  )
 
432
  def validate_formula(formula, selected_bands):
433
  allowed_chars = set(" +-*/()0123456789.")
434
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -438,6 +468,7 @@ if main_selection and sub_selection:
438
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
439
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
440
  return True, ""
 
441
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
442
  if not is_valid:
443
  st.error(error_message)
@@ -445,6 +476,7 @@ if main_selection and sub_selection:
445
  elif not custom_formula:
446
  st.warning("Please enter a custom formula to proceed.")
447
  st.stop()
 
448
  st.write(f"Custom Formula: {custom_formula}")
449
 
450
  reducer_choice = st.selectbox(
@@ -453,14 +485,14 @@ reducer_choice = st.selectbox(
453
  index=0
454
  )
455
 
456
- start_date = st.date_input("Start Date", value=pd.to_datetime('2024-11-01'))
457
- end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
458
  start_date_str = start_date.strftime('%Y-%m-%d')
459
  end_date_str = end_date.strftime('%Y-%m-%d')
460
 
461
  aggregation_period = st.selectbox(
462
- "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
463
- ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
464
  index=0
465
  )
466
 
@@ -471,7 +503,7 @@ include_boundary = None
471
 
472
  if shape_type.lower() == "point":
473
  kernel_size = st.selectbox(
474
- "Select Calculation Area(e.g, Point , 3x3 Kernel , 5x5 Kernel)",
475
  ["Point", "3x3 Kernel", "5x5 Kernel"],
476
  index=0,
477
  help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
@@ -485,46 +517,51 @@ elif shape_type.lower() == "polygon":
485
 
486
  file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
487
  locations_df = pd.DataFrame()
 
 
488
 
489
  if file_upload is not None:
490
  if shape_type.lower() == "point":
491
  if file_upload.name.endswith('.csv'):
492
- # Read the CSV file
493
  locations_df = pd.read_csv(file_upload)
494
- # Show the first few rows to help user identify columns
495
  st.write("Preview of your uploaded data (first 5 rows):")
496
  st.dataframe(locations_df.head())
497
-
498
- # Dynamically populate dropdown menus for latitude and longitude
 
499
  col1, col2 = st.columns(2)
500
  with col1:
501
- lat_col = st.selectbox(
502
- "Select the Latitude column",
503
- options=locations_df.columns,
504
- help="Choose the column containing latitude values."
 
505
  )
506
  with col2:
507
- lon_col = st.selectbox(
508
- "Select the Longitude column",
509
- options=locations_df.columns,
510
- help="Choose the column containing longitude values."
 
511
  )
512
-
513
- # Validate the selected columns contain numeric data
514
- if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
515
- st.error("Error: Selected Latitude and Longitude columns must contain numeric values.")
516
  st.stop()
517
-
 
 
 
 
 
518
  elif file_upload.name.endswith('.geojson'):
519
  locations_df = gpd.read_file(file_upload)
520
  if 'geometry' in locations_df.columns:
521
- # Extract latitude and longitude from geometry
522
- locations_df['original_latitude'] = locations_df['geometry'].y
523
- locations_df['original_longitude'] = locations_df['geometry'].x
524
-
525
- # Preserve original column names
526
- lat_col = 'original_latitude'
527
- lon_col = 'original_longitude'
528
  else:
529
  st.error("GeoJSON file doesn't contain geometry column")
530
  st.stop()
@@ -547,25 +584,24 @@ if file_upload is not None:
547
  st.error("No valid Point data found in the KML file.")
548
  else:
549
  locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
550
- locations_df['original_latitude'] = locations_df['geometry'].y
551
- locations_df['original_longitude'] = locations_df['geometry'].x
552
-
553
- # Preserve original column names
554
- lat_col = 'original_latitude'
555
- lon_col = 'original_longitude'
556
  except Exception as e:
557
  st.error(f"Error parsing KML file: {str(e)}")
558
- # Display map for points if we have valid data
559
- if not locations_df.empty and lat_col in locations_df.columns and lon_col in locations_df.columns:
560
- m = leafmap.Map(center=[locations_df[lat_col].mean(), locations_df[lon_col].mean()], zoom=10)
561
  for _, row in locations_df.iterrows():
562
- latitude = row[lat_col]
563
- longitude = row[lon_col]
564
  if pd.isna(latitude) or pd.isna(longitude):
565
  continue
566
  m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
567
  st.write("Map of Uploaded Points:")
568
  m.to_streamlit()
 
569
  elif shape_type.lower() == "polygon":
570
  if file_upload.name.endswith('.csv'):
571
  st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
@@ -595,7 +631,7 @@ if file_upload is not None:
595
  locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
596
  except Exception as e:
597
  st.error(f"Error parsing KML file: {str(e)}")
598
- # Display map for polygons if we have valid data
599
  if not locations_df.empty and 'geometry' in locations_df.columns:
600
  centroid_lat = locations_df.geometry.centroid.y.mean()
601
  centroid_lon = locations_df.geometry.centroid.x.mean()
@@ -621,16 +657,18 @@ if st.button(f"Calculate {custom_formula}"):
621
  reducer_choice,
622
  shape_type,
623
  aggregation_period,
 
 
624
  custom_formula,
625
  kernel_size,
626
- include_boundary,
627
- lat_col=lat_col if shape_type.lower() == "point" else None,
628
- lon_col=lon_col if shape_type.lower() == "point" else None
629
  )
 
630
  if results:
631
  result_df = pd.DataFrame(results)
632
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
633
  st.dataframe(result_df)
 
634
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
635
  st.download_button(
636
  label="Download results as CSV",
@@ -638,10 +676,12 @@ if st.button(f"Calculate {custom_formula}"):
638
  file_name=filename,
639
  mime='text/csv'
640
  )
 
641
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
642
  else:
643
  st.warning("No results were generated. Check your inputs or formula.")
644
  st.info(f"Total processing time: {processing_time:.2f} seconds.")
 
645
  except Exception as e:
646
  st.error(f"An error occurred during processing: {str(e)}")
647
  else:
 
4
  import os
5
  import pandas as pd
6
  import geopandas as gpd
7
+ from datetime import datetime, timedelta
8
  import leafmap.foliumap as leafmap
9
  import re
10
  from shapely.geometry import base
 
143
  return ee.Image(0).rename('custom_result').set('error', str(e))
144
 
145
  # Aggregation functions
146
+ def aggregate_data_daily(collection, start_date, end_date):
147
+ # Create date range for daily aggregation
148
+ dates = pd.date_range(start=start_date, end=end_date, freq='D')
149
+
150
+ def get_daily_image(date):
151
+ date_str = date.strftime('%Y-%m-%d')
152
+ daily_collection = collection.filterDate(ee.Date(date_str), ee.Date(date_str).advance(1, 'day'))
153
+ return daily_collection.mean().set('date', date_str)
154
+
155
+ daily_images = []
156
+ for date in dates:
157
+ daily_images.append(get_daily_image(date))
158
+
159
  return ee.ImageCollection(daily_images)
160
 
161
+ def aggregate_data_weekly(collection, start_date, end_date):
162
+ # Generate weekly intervals starting exactly from start_date
163
+ start_date = pd.to_datetime(start_date)
164
+ end_date = pd.to_datetime(end_date)
165
+
166
+ weekly_intervals = []
167
+ current_date = start_date
168
+ while current_date <= end_date:
169
+ next_date = current_date + timedelta(days=6) # 7-day week (inclusive)
170
+ if next_date > end_date:
171
+ next_date = end_date
172
+ weekly_intervals.append((current_date.strftime('%Y-%m-%d'), next_date.strftime('%Y-%m-%d')))
173
+ current_date = next_date + timedelta(days=1)
174
+
175
+ def get_weekly_image(start, end):
176
+ weekly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
177
+ return weekly_collection.mean().set('week_start', start).set('week_end', end)
178
+
179
+ weekly_images = []
180
+ for start, end in weekly_intervals:
181
+ weekly_images.append(get_weekly_image(start, end))
182
+
183
  return ee.ImageCollection(weekly_images)
184
 
185
  def aggregate_data_monthly(collection, start_date, end_date):
186
+ # Create monthly aggregation
187
+ dates = pd.date_range(start=start_date, end=end_date, freq='MS') # Month Start
188
+
189
+ def get_monthly_image(date):
190
+ date_str = date.strftime('%Y-%m-%d')
191
+ next_month = date + pd.offsets.MonthBegin(1)
192
+ next_month_str = next_month.strftime('%Y-%m-%d')
193
+ monthly_collection = collection.filterDate(ee.Date(date_str), ee.Date(next_month_str))
194
+ return monthly_collection.mean().set('month', date.strftime('%Y-%m'))
195
+
196
+ monthly_images = []
197
+ for date in dates:
198
+ monthly_images.append(get_monthly_image(date))
199
+
200
  return ee.ImageCollection(monthly_images)
201
 
202
+ def aggregate_data_yearly(collection, start_date, end_date):
203
+ # Create yearly aggregation
204
+ years = range(pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)
205
+
206
+ def get_yearly_image(year):
207
+ start = f"{year}-01-01"
208
+ end = f"{year+1}-01-01"
209
+ yearly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
210
+ return yearly_collection.mean().set('year', str(year))
211
+
212
+ yearly_images = []
213
+ for year in years:
214
+ yearly_images.append(get_yearly_image(year))
215
+
216
  return ee.ImageCollection(yearly_images)
217
 
218
+ def aggregate_data_custom(collection, start_date, end_date):
219
+ # Custom aggregation (entire date range)
220
+ collection = collection.filterDate(ee.Date(start_date), ee.Date(end_date))
221
+ return collection.mean().set('start_date', start_date).set('end_date', end_date)
222
+
223
  # Worker function for processing a single geometry
224
+ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
225
  if shape_type.lower() == "point":
226
+ latitude = row.get('latitude')
227
+ longitude = row.get('longitude')
228
  if pd.isna(latitude) or pd.isna(longitude):
229
+ return None
230
  location_name = row.get('name', f"Location_{row.name}")
231
  if kernel_size == "3x3 Kernel":
232
+ buffer_size = 45
233
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
234
  elif kernel_size == "5x5 Kernel":
235
+ buffer_size = 75
236
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
237
+ else:
238
  roi = ee.Geometry.Point([longitude, latitude])
239
  elif shape_type.lower() == "polygon":
240
  polygon_geometry = row.get('geometry')
 
244
  if not include_boundary:
245
  roi = roi.buffer(-30).bounds()
246
  except ValueError:
247
+ return None
248
+
249
+ # Filter collection by location
250
+ collection = ee.ImageCollection(dataset_id).filterBounds(roi)
251
+
252
+ # Apply temporal aggregation based on selected period
253
+ if aggregation_period.lower() == 'daily':
254
+ collection = aggregate_data_daily(collection, start_date_str, end_date_str)
255
+ period_label = 'Date'
256
  elif aggregation_period.lower() == 'weekly':
257
+ collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
258
+ period_label = 'Week'
259
  elif aggregation_period.lower() == 'monthly':
260
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
261
+ period_label = 'Month'
262
  elif aggregation_period.lower() == 'yearly':
263
+ collection = aggregate_data_yearly(collection, start_date_str, end_date_str)
264
+ period_label = 'Year'
265
+ else: # Custom
266
+ collection = ee.ImageCollection([aggregate_data_custom(collection, start_date_str, end_date_str)])
267
+ period_label = 'Date Range'
268
+
269
  # Process each image in the collection
270
  image_list = collection.toList(collection.size())
 
271
  aggregated_results = []
272
+
273
  for i in range(image_list.size().getInfo()):
274
  image = ee.Image(image_list.get(i))
275
+
276
+ # Get the appropriate date label based on aggregation period
277
+ if aggregation_period.lower() == 'daily':
278
+ date = image.get('date').getInfo()
279
  elif aggregation_period.lower() == 'weekly':
280
+ date = f"{image.get('week_start').getInfo()} to {image.get('week_end').getInfo()}"
 
 
 
 
 
 
 
281
  elif aggregation_period.lower() == 'monthly':
282
+ date = image.get('month').getInfo()
 
 
283
  elif aggregation_period.lower() == 'yearly':
284
+ date = image.get('year').getInfo()
285
+ else: # Custom
286
+ date = f"{image.get('start_date').getInfo()} to {image.get('end_date').getInfo()}"
287
+
288
+ # Calculate the custom formula
289
  index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
290
+
291
  try:
292
  index_value = index_image.reduceRegion(
293
  reducer=get_reducer(reducer_choice),
 
295
  scale=30
296
  ).get('custom_result')
297
  calculated_value = index_value.getInfo()
298
+
299
  if isinstance(calculated_value, (int, float)):
300
  result = {
301
  'Location Name': location_name,
302
  period_label: date,
 
 
303
  'Calculated Value': calculated_value
304
  }
305
  if shape_type.lower() == 'point':
306
+ result[original_lat_col] = latitude
307
+ result[original_lon_col] = longitude
308
  aggregated_results.append(result)
309
  except Exception as e:
310
  st.error(f"Error retrieving value for {location_name}: {e}")
311
+
312
  return aggregated_results
313
 
314
  # Main processing function
315
+ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
316
  aggregated_results = []
317
  total_steps = len(locations_df)
318
  progress_bar = st.progress(0)
319
  progress_text = st.empty()
320
+
321
+ start_time = time.time()
322
  with ThreadPoolExecutor(max_workers=10) as executor:
323
  futures = []
324
  for idx, row in locations_df.iterrows():
 
333
  shape_type,
334
  aggregation_period,
335
  custom_formula,
336
+ original_lat_col,
337
+ original_lon_col,
338
  kernel_size,
339
+ include_boundary
 
 
340
  )
341
  futures.append(future)
342
+
343
  completed = 0
344
  for future in as_completed(futures):
345
  result = future.result()
 
349
  progress_percentage = completed / total_steps
350
  progress_bar.progress(progress_percentage)
351
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
352
+
353
  end_time = time.time()
354
+ processing_time = end_time - start_time
355
+
356
  if aggregated_results:
357
  result_df = pd.DataFrame(aggregated_results)
358
+ return result_df.to_dict(orient='records'), processing_time
359
+ return [], processing_time
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
  # Streamlit App Logic
362
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 
364
 
365
  # Initialize data as an empty dictionary
366
  data = {}
367
+
368
  if imagery_base == "Sentinel":
369
  dataset_file = "sentinel_datasets.json"
370
  try:
 
416
  st.stop()
417
 
418
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
419
+
420
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
421
 
422
  sub_selection = None
423
  dataset_id = None
424
+
425
  if main_selection:
426
  sub_options = data[main_selection]["sub_options"]
427
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
 
431
  dataset_id = sub_selection
432
 
433
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
434
+
435
  if main_selection and sub_selection:
436
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
437
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
 
441
  default=[dataset_bands[0]] if dataset_bands else [],
442
  help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
443
  )
444
+
445
  if len(selected_bands) < 1:
446
  st.warning("Please select at least one band.")
447
  st.stop()
448
+
449
  if selected_bands:
450
  if len(selected_bands) == 1:
451
  default_formula = f"{selected_bands[0]}"
 
458
  value=default_formula,
459
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
460
  )
461
+
462
  def validate_formula(formula, selected_bands):
463
  allowed_chars = set(" +-*/()0123456789.")
464
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
 
468
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
469
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
470
  return True, ""
471
+
472
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
473
  if not is_valid:
474
  st.error(error_message)
 
476
  elif not custom_formula:
477
  st.warning("Please enter a custom formula to proceed.")
478
  st.stop()
479
+
480
  st.write(f"Custom Formula: {custom_formula}")
481
 
482
  reducer_choice = st.selectbox(
 
485
  index=0
486
  )
487
 
488
+ start_date = st.date_input("Start Date", value=pd.to_datetime('2024-01-01'))
489
+ end_date = st.date_input("End Date", value=pd.to_datetime('2024-01-31'))
490
  start_date_str = start_date.strftime('%Y-%m-%d')
491
  end_date_str = end_date.strftime('%Y-%m-%d')
492
 
493
  aggregation_period = st.selectbox(
494
+ "Select Aggregation Period",
495
+ ["Daily", "Weekly", "Monthly", "Yearly", "Custom (Complete Date Range)"],
496
  index=0
497
  )
498
 
 
503
 
504
  if shape_type.lower() == "point":
505
  kernel_size = st.selectbox(
506
+ "Select Calculation Area",
507
  ["Point", "3x3 Kernel", "5x5 Kernel"],
508
  index=0,
509
  help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
 
517
 
518
  file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
519
  locations_df = pd.DataFrame()
520
+ original_lat_col = None
521
+ original_lon_col = None
522
 
523
  if file_upload is not None:
524
  if shape_type.lower() == "point":
525
  if file_upload.name.endswith('.csv'):
 
526
  locations_df = pd.read_csv(file_upload)
527
+
528
  st.write("Preview of your uploaded data (first 5 rows):")
529
  st.dataframe(locations_df.head())
530
+
531
+ all_columns = locations_df.columns.tolist()
532
+
533
  col1, col2 = st.columns(2)
534
  with col1:
535
+ original_lat_col = st.selectbox(
536
+ "Select Latitude Column",
537
+ options=all_columns,
538
+ index=all_columns.index('latitude') if 'latitude' in all_columns else 0,
539
+ help="Select the column containing latitude values"
540
  )
541
  with col2:
542
+ original_lon_col = st.selectbox(
543
+ "Select Longitude Column",
544
+ options=all_columns,
545
+ index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
546
+ help="Select the column containing longitude values"
547
  )
548
+
549
+ if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
550
+ st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
 
551
  st.stop()
552
+
553
+ locations_df = locations_df.rename(columns={
554
+ original_lat_col: 'latitude',
555
+ original_lon_col: 'longitude'
556
+ })
557
+
558
  elif file_upload.name.endswith('.geojson'):
559
  locations_df = gpd.read_file(file_upload)
560
  if 'geometry' in locations_df.columns:
561
+ locations_df['latitude'] = locations_df['geometry'].y
562
+ locations_df['longitude'] = locations_df['geometry'].x
563
+ original_lat_col = 'latitude'
564
+ original_lon_col = 'longitude'
 
 
 
565
  else:
566
  st.error("GeoJSON file doesn't contain geometry column")
567
  st.stop()
 
584
  st.error("No valid Point data found in the KML file.")
585
  else:
586
  locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
587
+ locations_df['latitude'] = locations_df['geometry'].y
588
+ locations_df['longitude'] = locations_df['geometry'].x
589
+ original_lat_col = 'latitude'
590
+ original_lon_col = 'longitude'
 
 
591
  except Exception as e:
592
  st.error(f"Error parsing KML file: {str(e)}")
593
+
594
+ if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
595
+ m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
596
  for _, row in locations_df.iterrows():
597
+ latitude = row['latitude']
598
+ longitude = row['longitude']
599
  if pd.isna(latitude) or pd.isna(longitude):
600
  continue
601
  m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
602
  st.write("Map of Uploaded Points:")
603
  m.to_streamlit()
604
+
605
  elif shape_type.lower() == "polygon":
606
  if file_upload.name.endswith('.csv'):
607
  st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
 
631
  locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
632
  except Exception as e:
633
  st.error(f"Error parsing KML file: {str(e)}")
634
+
635
  if not locations_df.empty and 'geometry' in locations_df.columns:
636
  centroid_lat = locations_df.geometry.centroid.y.mean()
637
  centroid_lon = locations_df.geometry.centroid.x.mean()
 
657
  reducer_choice,
658
  shape_type,
659
  aggregation_period,
660
+ original_lat_col,
661
+ original_lon_col,
662
  custom_formula,
663
  kernel_size,
664
+ include_boundary
 
 
665
  )
666
+
667
  if results:
668
  result_df = pd.DataFrame(results)
669
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
670
  st.dataframe(result_df)
671
+
672
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
673
  st.download_button(
674
  label="Download results as CSV",
 
676
  file_name=filename,
677
  mime='text/csv'
678
  )
679
+
680
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
681
  else:
682
  st.warning("No results were generated. Check your inputs or formula.")
683
  st.info(f"Total processing time: {processing_time:.2f} seconds.")
684
+
685
  except Exception as e:
686
  st.error(f"An error occurred during processing: {str(e)}")
687
  else: