YashMK89 commited on
Commit
d805acf
·
verified ·
1 Parent(s): 848c770

update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -262
app.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import ee
4
  import os
5
  import pandas as pd
6
- import numpy as np
7
  import geopandas as gpd
8
  from datetime import datetime
9
  import leafmap.foliumap as leafmap
@@ -12,8 +11,9 @@ from shapely.geometry import base
12
  from xml.etree import ElementTree as XET
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
  import time
15
- import matplotlib.pyplot as plt
16
- import plotly.express as px
 
17
 
18
  # Set up the page layout
19
  st.set_page_config(layout="wide")
@@ -107,49 +107,28 @@ def convert_to_ee_geometry(geometry):
107
  else:
108
  raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
109
 
110
- # Function to calculate custom formula with dynamic scale handling
111
- def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, dataset_id, user_scale=None):
112
  try:
113
- # Fetch the nominal scales of the selected bands
114
- band_scales = []
115
- for band in selected_bands:
116
- band_scale = image.select(band).projection().nominalScale().getInfo()
117
- band_scales.append(band_scale)
118
- default_scale = min(band_scales) if band_scales else 30 # Default to 30m if no bands are found
119
- scale = user_scale if user_scale is not None else default_scale
120
-
121
- # Rescale all bands to the chosen scale
122
- rescaled_bands = {}
123
  for band in selected_bands:
124
- band_image = image.select(band)
125
- band_scale = band_image.projection().nominalScale().getInfo()
126
- if band_scale != scale:
127
- rescaled_band = band_image.resample('bilinear').reproject(
128
- crs=band_image.projection().crs(),
129
- scale=scale
130
- )
131
- rescaled_bands[band] = rescaled_band
132
- else:
133
- rescaled_bands[band] = band_image
134
-
135
- # Validate and extract band values
136
- reduced_values = {}
137
  reducer = get_reducer(reducer_choice)
 
138
  for band in selected_bands:
139
- value = rescaled_bands[band].reduceRegion(
140
  reducer=reducer,
141
  geometry=geometry,
142
  scale=scale
143
  ).get(band).getInfo()
144
  reduced_values[band] = float(value if value is not None else 0)
145
-
146
- # Evaluate the custom formula
147
  formula = custom_formula
148
  for band in selected_bands:
149
  formula = formula.replace(band, str(reduced_values[band]))
150
  result = eval(formula, {"__builtins__": {}}, reduced_values)
151
-
152
- # Validate the result
153
  if not isinstance(result, (int, float)):
154
  raise ValueError("Formula did not result in a numeric value.")
155
  return ee.Image.constant(result).rename('custom_result')
@@ -177,35 +156,21 @@ def aggregate_data_custom(collection):
177
  daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
178
  return ee.ImageCollection(daily_images)
179
 
180
- def aggregate_data_daily(collection):
181
- def set_day_start(image):
182
  date = ee.Date(image.get('system:time_start'))
183
- day_start = date.format('YYYY-MM-dd')
184
- return image.set('day_start', day_start)
185
- collection = collection.map(set_day_start)
186
- grouped_by_day = collection.aggregate_array('day_start').distinct()
187
- def calculate_daily_mean(day_start):
188
- daily_collection = collection.filter(ee.Filter.eq('day_start', day_start))
189
- daily_mean = daily_collection.mean()
190
- return daily_mean.set('day_start', day_start)
191
- daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
192
- return ee.ImageCollection(daily_images)
193
-
194
- def aggregate_data_weekly(collection, start_date_str, end_date_str):
195
- start_date = ee.Date(start_date_str)
196
- end_date = ee.Date(end_date_str)
197
- days_diff = end_date.difference(start_date, 'day')
198
- num_weeks = days_diff.divide(7).ceil().getInfo()
199
- weekly_images = []
200
- for week in range(num_weeks):
201
- week_start = start_date.advance(week * 7, 'day')
202
- week_end = week_start.advance(7, 'day')
203
- weekly_collection = collection.filterDate(week_start, week_end)
204
- if weekly_collection.size().getInfo() > 0:
205
- weekly_mean = weekly_collection.mean()
206
- weekly_mean = weekly_mean.set('week_start', week_start.format('YYYY-MM-dd'))
207
- weekly_images.append(weekly_mean)
208
- return ee.ImageCollection.fromImages(weekly_images)
209
 
210
  def aggregate_data_monthly(collection, start_date, end_date):
211
  collection = collection.filterDate(start_date, end_date)
@@ -228,57 +193,35 @@ def aggregate_data_yearly(collection):
228
  yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
229
  return ee.ImageCollection(yearly_images)
230
 
231
- # Cloud percentage calculation
232
- def calculate_cloud_percentage(image, cloud_band='QA60'):
233
- qa60 = image.select(cloud_band)
234
- opaque_clouds = qa60.bitwiseAnd(1 << 10)
235
- cirrus_clouds = qa60.bitwiseAnd(1 << 11)
236
- cloud_mask = opaque_clouds.Or(cirrus_clouds)
237
- total_pixels = qa60.reduceRegion(
238
- reducer=ee.Reducer.count(),
239
- geometry=image.geometry(),
240
- scale=60,
241
- maxPixels=1e13
242
- ).get(cloud_band)
243
- cloudy_pixels = cloud_mask.reduceRegion(
244
- reducer=ee.Reducer.sum(),
245
- geometry=image.geometry(),
246
- scale=60,
247
- maxPixels=1e13
248
- ).get(cloud_band)
249
- if total_pixels == 0:
250
- return 0
251
- return ee.Number(cloudy_pixels).divide(ee.Number(total_pixels)).multiply(100)
252
-
253
- # Preprocessing function
254
- def preprocess_collection(collection, pixel_cloud_threshold):
255
- def mask_cloudy_pixels(image):
256
- qa60 = image.select('QA60')
257
- opaque_clouds = qa60.bitwiseAnd(1 << 10)
258
- cirrus_clouds = qa60.bitwiseAnd(1 << 11)
259
- cloud_mask = opaque_clouds.Or(cirrus_clouds)
260
- clear_pixels = cloud_mask.Not()
261
- return image.updateMask(clear_pixels)
262
-
263
- if pixel_cloud_threshold > 0:
264
- return collection.map(mask_cloudy_pixels)
265
- return collection
266
-
267
- # Process single geometry
268
- def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None, user_scale=None, pixel_cloud_threshold=0):
269
  if shape_type.lower() == "point":
270
  latitude = row.get('latitude')
271
  longitude = row.get('longitude')
272
  if pd.isna(latitude) or pd.isna(longitude):
273
- return None
274
  location_name = row.get('name', f"Location_{row.name}")
275
  if kernel_size == "3x3 Kernel":
276
- buffer_size = 45
277
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
278
  elif kernel_size == "5x5 Kernel":
279
- buffer_size = 75
280
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
281
- else:
282
  roi = ee.Geometry.Point([longitude, latitude])
283
  elif shape_type.lower() == "polygon":
284
  polygon_geometry = row.get('geometry')
@@ -288,33 +231,25 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
288
  if not include_boundary:
289
  roi = roi.buffer(-30).bounds()
290
  except ValueError:
291
- return None
292
-
293
- # Filter collection by date and area first
294
  collection = ee.ImageCollection(dataset_id) \
295
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
296
- .filterBounds(roi)
297
-
298
- st.write(f"After initial filtering: {collection.size().getInfo()} images")
299
-
300
- # Apply pixel cloud masking if threshold > 0
301
- if pixel_cloud_threshold > 0:
302
- collection = preprocess_collection(collection, pixel_cloud_threshold)
303
- st.write(f"After cloud masking: {collection.size().getInfo()} images")
304
-
305
  if aggregation_period.lower() == 'custom (start date to end date)':
306
  collection = aggregate_data_custom(collection)
307
  elif aggregation_period.lower() == 'daily':
308
  collection = aggregate_data_daily(collection)
309
  elif aggregation_period.lower() == 'weekly':
310
- collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
311
  elif aggregation_period.lower() == 'monthly':
312
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
313
  elif aggregation_period.lower() == 'yearly':
314
  collection = aggregate_data_yearly(collection)
315
-
316
  image_list = collection.toList(collection.size())
317
- processed_weeks = set()
318
  aggregated_results = []
319
  for i in range(image_list.size().getInfo()):
320
  image = ee.Image(image_list.get(i))
@@ -324,17 +259,12 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
324
  date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
325
  elif aggregation_period.lower() == 'daily':
326
  timestamp = image.get('day_start')
327
- period_label = 'Date'
328
  date = ee.String(timestamp).getInfo()
329
  elif aggregation_period.lower() == 'weekly':
330
  timestamp = image.get('week_start')
331
  period_label = 'Week'
332
  date = ee.String(timestamp).getInfo()
333
- if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
334
- pd.to_datetime(date) > pd.to_datetime(end_date_str) or
335
- date in processed_weeks):
336
- continue
337
- processed_weeks.add(date)
338
  elif aggregation_period.lower() == 'monthly':
339
  timestamp = image.get('month')
340
  period_label = 'Month'
@@ -343,13 +273,12 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
343
  timestamp = image.get('year')
344
  period_label = 'Year'
345
  date = ee.Date(timestamp).format('YYYY').getInfo()
346
-
347
- index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, dataset_id, user_scale=user_scale)
348
  try:
349
  index_value = index_image.reduceRegion(
350
  reducer=get_reducer(reducer_choice),
351
  geometry=roi,
352
- scale=user_scale
353
  ).get('custom_result')
354
  calculated_value = index_value.getInfo()
355
  if isinstance(calculated_value, (int, float)):
@@ -361,30 +290,20 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
361
  'Calculated Value': calculated_value
362
  }
363
  if shape_type.lower() == 'point':
364
- result[original_lat_col] = latitude
365
- result[original_lon_col] = longitude
366
  aggregated_results.append(result)
367
  except Exception as e:
368
  st.error(f"Error retrieving value for {location_name}: {e}")
369
  return aggregated_results
370
 
371
- # Process aggregation
372
- def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None, tile_cloud_threshold=0, pixel_cloud_threshold=0, user_scale=None):
373
  aggregated_results = []
374
  total_steps = len(locations_df)
375
  progress_bar = st.progress(0)
376
  progress_text = st.empty()
377
- start_time = time.time()
378
-
379
- raw_collection = ee.ImageCollection(dataset_id) \
380
- .filterDate(ee.Date(start_date_str), ee.Date(end_date_str))
381
-
382
- # st.write(f"Original Collection Size: {raw_collection.size().getInfo()}")
383
-
384
- if tile_cloud_threshold > 0 or pixel_cloud_threshold > 0:
385
- raw_collection = preprocess_collection(raw_collection, pixel_cloud_threshold)
386
- # st.write(f"Preprocessed Collection Size: {raw_collection.size().getInfo()}")
387
-
388
  with ThreadPoolExecutor(max_workers=10) as executor:
389
  futures = []
390
  for idx, row in locations_df.iterrows():
@@ -399,11 +318,8 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
399
  shape_type,
400
  aggregation_period,
401
  custom_formula,
402
- original_lat_col,
403
- original_lon_col,
404
  kernel_size,
405
- include_boundary,
406
- user_scale=user_scale
407
  )
408
  futures.append(future)
409
  completed = 0
@@ -415,10 +331,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
415
  progress_percentage = completed / total_steps
416
  progress_bar.progress(progress_percentage)
417
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
418
-
419
  end_time = time.time()
420
- processing_time = end_time - start_time
421
-
422
  if aggregated_results:
423
  result_df = pd.DataFrame(aggregated_results)
424
  if aggregation_period.lower() == 'custom (start date to end date)':
@@ -428,19 +343,22 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
428
  'Calculated Value': 'mean'
429
  }
430
  if shape_type.lower() == 'point':
431
- agg_dict[original_lat_col] = 'first'
432
- agg_dict[original_lon_col] = 'first'
433
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
434
- aggregated_output['Date Range'] = aggregated_output['Start Date'] + " to " + aggregated_output['End Date']
435
- return aggregated_output.to_dict(orient='records'), processing_time
436
  else:
437
- return result_df.to_dict(orient='records'), processing_time
438
- return [], processing_time
439
 
440
  # Streamlit App Logic
441
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
442
  imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODIS", "VIIRS", "Custom Input"], index=0)
 
 
443
  data = {}
 
444
  if imagery_base == "Sentinel":
445
  dataset_file = "sentinel_datasets.json"
446
  try:
@@ -480,9 +398,7 @@ elif imagery_base == "Custom Input":
480
  if custom_dataset_id.startswith("ee.ImageCollection("):
481
  custom_dataset_id = custom_dataset_id.replace("ee.ImageCollection('", "").replace("')", "")
482
  collection = ee.ImageCollection(custom_dataset_id)
483
- first_image = collection.first()
484
- default_scale = first_image.projection().nominalScale().getInfo()
485
- band_names = first_image.bandNames().getInfo()
486
  data = {
487
  f"Custom Dataset: {custom_dataset_id}": {
488
  "sub_options": {custom_dataset_id: f"Custom Dataset ({custom_dataset_id})"},
@@ -497,14 +413,18 @@ elif imagery_base == "Custom Input":
497
  else:
498
  st.warning("Please enter a custom dataset ID to proceed.")
499
  data = {}
 
500
  if not data:
501
  st.error("No valid dataset available. Please check your inputs.")
502
  st.stop()
503
 
504
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 
505
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 
506
  sub_selection = None
507
  dataset_id = None
 
508
  if main_selection:
509
  sub_options = data[main_selection]["sub_options"]
510
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -512,7 +432,6 @@ if main_selection:
512
  st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
513
  st.write(f"Dataset ID: {sub_selection}")
514
  dataset_id = sub_selection
515
- # Fetch the default scale for the selected dataset
516
  try:
517
  collection = ee.ImageCollection(dataset_id)
518
  first_image = collection.first()
@@ -522,58 +441,21 @@ if main_selection:
522
  st.error(f"Error fetching default scale: {str(e)}")
523
 
524
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 
525
  if main_selection and sub_selection:
526
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
527
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
528
-
529
-
530
- # Fetch nominal scales for all bands in the selected dataset
531
- if dataset_id:
532
- try:
533
- # Fetch the first image from the collection to extract band information
534
- collection = ee.ImageCollection(dataset_id)
535
- first_image = collection.first()
536
- band_names = first_image.bandNames().getInfo()
537
-
538
- # Extract scales for all bands
539
- band_scales = []
540
- for band in band_names:
541
- band_scale = first_image.select(band).projection().nominalScale().getInfo()
542
- band_scales.append(band_scale)
543
-
544
- # Identify unique scales using np.unique
545
- unique_scales = np.unique(band_scales)
546
-
547
- # Display the unique scales to the user
548
- st.write(f"Nominal Scales for Bands: {band_scales}")
549
- st.write(f"Unique Scales in Dataset: {unique_scales}")
550
-
551
- # If there are multiple unique scales, allow the user to choose one
552
- if len(unique_scales) > 1:
553
- selected_scale = st.selectbox(
554
- "Select a Scale for Calculation (meters)",
555
- options=unique_scales,
556
- index=0,
557
- help="Choose a scale from the unique scales available in the dataset."
558
- )
559
- default_scale = selected_scale
560
- else:
561
- default_scale = unique_scales[0]
562
- st.write(f"Default Scale for Dataset: {default_scale} meters")
563
-
564
- except Exception as e:
565
- st.error(f"Error fetching band scales: {str(e)}")
566
- default_scale = 30 # Fallback to 30 meters if an error occurs
567
-
568
  selected_bands = st.multiselect(
569
- "Select 1 or 2 Bands for Calculation",
570
  options=dataset_bands,
571
  default=[dataset_bands[0]] if dataset_bands else [],
572
- help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
573
  )
 
574
  if len(selected_bands) < 1:
575
  st.warning("Please select at least one band.")
576
  st.stop()
 
577
  if selected_bands:
578
  if len(selected_bands) == 1:
579
  default_formula = f"{selected_bands[0]}"
@@ -586,6 +468,7 @@ if main_selection and sub_selection:
586
  value=default_formula,
587
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
588
  )
 
589
  def validate_formula(formula, selected_bands):
590
  allowed_chars = set(" +-*/()0123456789.")
591
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -595,6 +478,7 @@ if main_selection and sub_selection:
595
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
596
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
597
  return True, ""
 
598
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
599
  if not is_valid:
600
  st.error(error_message)
@@ -602,6 +486,7 @@ if main_selection and sub_selection:
602
  elif not custom_formula:
603
  st.warning("Please enter a custom formula to proceed.")
604
  st.stop()
 
605
  st.write(f"Custom Formula: {custom_formula}")
606
 
607
  reducer_choice = st.selectbox(
@@ -615,17 +500,6 @@ end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
615
  start_date_str = start_date.strftime('%Y-%m-%d')
616
  end_date_str = end_date.strftime('%Y-%m-%d')
617
 
618
- if imagery_base == "Sentinel" and "Sentinel-2" in sub_options[sub_selection]:
619
- st.markdown("<h5>Cloud Filtering</h5>", unsafe_allow_html=True)
620
- pixel_cloud_threshold = st.slider(
621
- "Select Maximum Pixel-Based Cloud Coverage Threshold (%)",
622
- min_value=0,
623
- max_value=100,
624
- value=5,
625
- step=5,
626
- help="Individual pixels with cloud coverage exceeding this threshold will be masked."
627
- )
628
-
629
  aggregation_period = st.selectbox(
630
  "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
631
  ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
@@ -633,6 +507,7 @@ aggregation_period = st.selectbox(
633
  )
634
 
635
  shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
 
636
  kernel_size = None
637
  include_boundary = None
638
 
@@ -650,23 +525,6 @@ elif shape_type.lower() == "polygon":
650
  help="Check to include pixels on the polygon boundary; uncheck to exclude them."
651
  )
652
 
653
- # st.markdown("<h5>Calculation Scale</h5>", unsafe_allow_html=True)
654
- # default_scale = ee.ImageCollection(dataset_id).first().select(0).projection().nominalScale().getInfo()
655
- # user_scale = st.number_input(
656
- # "Enter Calculation Scale (meters) [Leave blank to use dataset's default scale]",
657
- # min_value=1.0,
658
- # value=float(default_scale),
659
- # help=f"Default scale for this dataset is {default_scale} meters. Adjust if needed."
660
- # )
661
-
662
- st.markdown("<h5>Calculation Scale</h5>", unsafe_allow_html=True)
663
- user_scale = st.number_input(
664
- "Enter Calculation Scale (meters) [Leave blank to use dataset's default scale]",
665
- min_value=1.0,
666
- value=float(default_scale),
667
- help=f"Default scale for this dataset is {default_scale} meters. Adjust if needed."
668
- )
669
-
670
  file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
671
  locations_df = pd.DataFrame()
672
  original_lat_col = None
@@ -789,8 +647,10 @@ if file_upload is not None:
789
 
790
  if st.button(f"Calculate {custom_formula}"):
791
  if not locations_df.empty:
 
792
  with st.spinner("Processing Data..."):
793
  try:
 
794
  results, processing_time = process_aggregation(
795
  locations_df,
796
  start_date_str,
@@ -800,53 +660,30 @@ if st.button(f"Calculate {custom_formula}"):
800
  reducer_choice,
801
  shape_type,
802
  aggregation_period,
803
- original_lat_col,
804
- original_lon_col,
805
- custom_formula=custom_formula,
806
- kernel_size=kernel_size,
807
- include_boundary=include_boundary,
808
- pixel_cloud_threshold=pixel_cloud_threshold if "pixel_cloud_threshold" in locals() else 0,
809
- user_scale=user_scale
810
  )
 
811
  if results:
812
  result_df = pd.DataFrame(results)
813
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
814
  st.dataframe(result_df)
815
- filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
 
 
816
  st.download_button(
817
  label="Download results as CSV",
818
  data=result_df.to_csv(index=False).encode('utf-8'),
819
  file_name=filename,
820
  mime='text/csv'
821
  )
 
822
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
823
- st.markdown("<h5>Graph Visualization</h5>", unsafe_allow_html=True)
824
- if aggregation_period.lower() == 'custom (start date to end date)':
825
- x_column = 'Date Range'
826
- elif 'Date' in result_df.columns:
827
- x_column = 'Date'
828
- elif 'Week' in result_df.columns:
829
- x_column = 'Week'
830
- elif 'Month' in result_df.columns:
831
- x_column = 'Month'
832
- elif 'Year' in result_df.columns:
833
- x_column = 'Year'
834
- else:
835
- st.warning("No valid time column found for plotting.")
836
- st.stop()
837
- y_column = 'Calculated Value'
838
- fig = px.line(
839
- result_df,
840
- x=x_column,
841
- y=y_column,
842
- color='Location Name',
843
- title=f"{custom_formula} Over Time"
844
- )
845
- st.plotly_chart(fig)
846
  else:
847
  st.warning("No results were generated. Check your inputs or formula.")
848
- st.info(f"Total processing time: {processing_time:.2f} seconds.")
849
  except Exception as e:
850
  st.error(f"An error occurred during processing: {str(e)}")
851
  else:
852
- st.warning("Please upload a valid file to proceed.")
 
3
  import ee
4
  import os
5
  import pandas as pd
 
6
  import geopandas as gpd
7
  from datetime import datetime
8
  import leafmap.foliumap as leafmap
 
11
  from xml.etree import ElementTree as XET
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  import time
14
+
15
+ # Define default scale for calculations (in meters)
16
+ DEFAULT_SCALE = 30
17
 
18
  # Set up the page layout
19
  st.set_page_config(layout="wide")
 
107
  else:
108
  raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
109
 
110
+ # Function to calculate custom formula
111
+ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, scale=DEFAULT_SCALE):
112
  try:
113
+ band_values = {}
114
+ band_names = image.bandNames().getInfo()
 
 
 
 
 
 
 
 
115
  for band in selected_bands:
116
+ if band not in band_names:
117
+ raise ValueError(f"Band '{band}' not found in the dataset.")
118
+ band_values[band] = image.select(band)
 
 
 
 
 
 
 
 
 
 
119
  reducer = get_reducer(reducer_choice)
120
+ reduced_values = {}
121
  for band in selected_bands:
122
+ value = band_values[band].reduceRegion(
123
  reducer=reducer,
124
  geometry=geometry,
125
  scale=scale
126
  ).get(band).getInfo()
127
  reduced_values[band] = float(value if value is not None else 0)
 
 
128
  formula = custom_formula
129
  for band in selected_bands:
130
  formula = formula.replace(band, str(reduced_values[band]))
131
  result = eval(formula, {"__builtins__": {}}, reduced_values)
 
 
132
  if not isinstance(result, (int, float)):
133
  raise ValueError("Formula did not result in a numeric value.")
134
  return ee.Image.constant(result).rename('custom_result')
 
156
  daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
157
  return ee.ImageCollection(daily_images)
158
 
159
+ def aggregate_data_weekly(collection):
160
+ def set_week_start(image):
161
  date = ee.Date(image.get('system:time_start'))
162
+ days_since_week_start = date.getRelative('day', 'week')
163
+ offset = ee.Number(days_since_week_start).multiply(-1)
164
+ week_start = date.advance(offset, 'day')
165
+ return image.set('week_start', week_start.format('YYYY-MM-dd'))
166
+ collection = collection.map(set_week_start)
167
+ grouped_by_week = collection.aggregate_array('week_start').distinct()
168
+ def calculate_weekly_mean(week_start):
169
+ weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
170
+ weekly_mean = weekly_collection.mean()
171
+ return weekly_mean.set('week_start', week_start)
172
+ weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
173
+ return ee.ImageCollection(weekly_images)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  def aggregate_data_monthly(collection, start_date, end_date):
176
  collection = collection.filterDate(start_date, end_date)
 
193
  yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
194
  return ee.ImageCollection(yearly_images)
195
 
196
+ def aggregate_data_daily(collection):
197
+ def set_day_start(image):
198
+ date = ee.Date(image.get('system:time_start'))
199
+ day_start = date.format('YYYY-MM-dd')
200
+ return image.set('day_start', day_start)
201
+ collection = collection.map(set_day_start)
202
+ grouped_by_day = collection.aggregate_array('day_start').distinct()
203
+ def calculate_daily_mean(day_start):
204
+ daily_collection = collection.filter(ee.Filter.eq('day_start', day_start))
205
+ daily_mean = daily_collection.mean()
206
+ return daily_mean.set('day_start', day_start)
207
+ daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
208
+ return ee.ImageCollection(daily_images)
209
+
210
+ # Worker function for processing a single geometry
211
+ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  if shape_type.lower() == "point":
213
  latitude = row.get('latitude')
214
  longitude = row.get('longitude')
215
  if pd.isna(latitude) or pd.isna(longitude):
216
+ return None # Skip invalid points
217
  location_name = row.get('name', f"Location_{row.name}")
218
  if kernel_size == "3x3 Kernel":
219
+ buffer_size = 45 # 90m x 90m
220
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
221
  elif kernel_size == "5x5 Kernel":
222
+ buffer_size = 75 # 150m x 150m
223
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
224
+ else: # Point
225
  roi = ee.Geometry.Point([longitude, latitude])
226
  elif shape_type.lower() == "polygon":
227
  polygon_geometry = row.get('geometry')
 
231
  if not include_boundary:
232
  roi = roi.buffer(-30).bounds()
233
  except ValueError:
234
+ return None # Skip invalid polygons
235
+ # Filter and aggregate the image collection
 
236
  collection = ee.ImageCollection(dataset_id) \
237
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
238
+ .filterBounds(roi) \
239
+ .select(selected_bands) # Filter bands here
 
 
 
 
 
 
 
240
  if aggregation_period.lower() == 'custom (start date to end date)':
241
  collection = aggregate_data_custom(collection)
242
  elif aggregation_period.lower() == 'daily':
243
  collection = aggregate_data_daily(collection)
244
  elif aggregation_period.lower() == 'weekly':
245
+ collection = aggregate_data_weekly(collection)
246
  elif aggregation_period.lower() == 'monthly':
247
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
248
  elif aggregation_period.lower() == 'yearly':
249
  collection = aggregate_data_yearly(collection)
250
+ # Process each image in the collection
251
  image_list = collection.toList(collection.size())
252
+ processed_days = set()
253
  aggregated_results = []
254
  for i in range(image_list.size().getInfo()):
255
  image = ee.Image(image_list.get(i))
 
259
  date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
260
  elif aggregation_period.lower() == 'daily':
261
  timestamp = image.get('day_start')
262
+ period_label = 'Day'
263
  date = ee.String(timestamp).getInfo()
264
  elif aggregation_period.lower() == 'weekly':
265
  timestamp = image.get('week_start')
266
  period_label = 'Week'
267
  date = ee.String(timestamp).getInfo()
 
 
 
 
 
268
  elif aggregation_period.lower() == 'monthly':
269
  timestamp = image.get('month')
270
  period_label = 'Month'
 
273
  timestamp = image.get('year')
274
  period_label = 'Year'
275
  date = ee.Date(timestamp).format('YYYY').getInfo()
276
+ index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=DEFAULT_SCALE)
 
277
  try:
278
  index_value = index_image.reduceRegion(
279
  reducer=get_reducer(reducer_choice),
280
  geometry=roi,
281
+ scale=DEFAULT_SCALE
282
  ).get('custom_result')
283
  calculated_value = index_value.getInfo()
284
  if isinstance(calculated_value, (int, float)):
 
290
  'Calculated Value': calculated_value
291
  }
292
  if shape_type.lower() == 'point':
293
+ result['Latitude'] = latitude
294
+ result['Longitude'] = longitude
295
  aggregated_results.append(result)
296
  except Exception as e:
297
  st.error(f"Error retrieving value for {location_name}: {e}")
298
  return aggregated_results
299
 
300
+ # Main processing function
301
+ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
302
  aggregated_results = []
303
  total_steps = len(locations_df)
304
  progress_bar = st.progress(0)
305
  progress_text = st.empty()
306
+ start_time = time.time() # Start timing the process
 
 
 
 
 
 
 
 
 
 
307
  with ThreadPoolExecutor(max_workers=10) as executor:
308
  futures = []
309
  for idx, row in locations_df.iterrows():
 
318
  shape_type,
319
  aggregation_period,
320
  custom_formula,
 
 
321
  kernel_size,
322
+ include_boundary
 
323
  )
324
  futures.append(future)
325
  completed = 0
 
331
  progress_percentage = completed / total_steps
332
  progress_bar.progress(progress_percentage)
333
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
334
+ # End timing the process
335
  end_time = time.time()
336
+ processing_time = end_time - start_time # Calculate total processing time
 
337
  if aggregated_results:
338
  result_df = pd.DataFrame(aggregated_results)
339
  if aggregation_period.lower() == 'custom (start date to end date)':
 
343
  'Calculated Value': 'mean'
344
  }
345
  if shape_type.lower() == 'point':
346
+ agg_dict['Latitude'] = 'first'
347
+ agg_dict['Longitude'] = 'first'
348
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
349
+ aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
350
+ return aggregated_output.to_dict(orient='records'), processing_time # Return processing time
351
  else:
352
+ return result_df.to_dict(orient='records'), processing_time
353
+ return [], processing_time
354
 
355
  # Streamlit App Logic
356
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
357
  imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODIS", "VIIRS", "Custom Input"], index=0)
358
+
359
+ # Initialize data as an empty dictionary
360
  data = {}
361
+
362
  if imagery_base == "Sentinel":
363
  dataset_file = "sentinel_datasets.json"
364
  try:
 
398
  if custom_dataset_id.startswith("ee.ImageCollection("):
399
  custom_dataset_id = custom_dataset_id.replace("ee.ImageCollection('", "").replace("')", "")
400
  collection = ee.ImageCollection(custom_dataset_id)
401
+ band_names = collection.first().bandNames().getInfo()
 
 
402
  data = {
403
  f"Custom Dataset: {custom_dataset_id}": {
404
  "sub_options": {custom_dataset_id: f"Custom Dataset ({custom_dataset_id})"},
 
413
  else:
414
  st.warning("Please enter a custom dataset ID to proceed.")
415
  data = {}
416
+
417
  if not data:
418
  st.error("No valid dataset available. Please check your inputs.")
419
  st.stop()
420
 
421
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
422
+
423
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
424
+
425
  sub_selection = None
426
  dataset_id = None
427
+
428
  if main_selection:
429
  sub_options = data[main_selection]["sub_options"]
430
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
 
432
  st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
433
  st.write(f"Dataset ID: {sub_selection}")
434
  dataset_id = sub_selection
 
435
  try:
436
  collection = ee.ImageCollection(dataset_id)
437
  first_image = collection.first()
 
441
  st.error(f"Error fetching default scale: {str(e)}")
442
 
443
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
444
+
445
  if main_selection and sub_selection:
446
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
447
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  selected_bands = st.multiselect(
449
+ "Select Bands for Calculation",
450
  options=dataset_bands,
451
  default=[dataset_bands[0]] if dataset_bands else [],
452
+ help=f"Select bands from: {', '.join(dataset_bands)}"
453
  )
454
+
455
  if len(selected_bands) < 1:
456
  st.warning("Please select at least one band.")
457
  st.stop()
458
+
459
  if selected_bands:
460
  if len(selected_bands) == 1:
461
  default_formula = f"{selected_bands[0]}"
 
468
  value=default_formula,
469
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
470
  )
471
+
472
  def validate_formula(formula, selected_bands):
473
  allowed_chars = set(" +-*/()0123456789.")
474
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
 
478
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
479
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
480
  return True, ""
481
+
482
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
483
  if not is_valid:
484
  st.error(error_message)
 
486
  elif not custom_formula:
487
  st.warning("Please enter a custom formula to proceed.")
488
  st.stop()
489
+
490
  st.write(f"Custom Formula: {custom_formula}")
491
 
492
  reducer_choice = st.selectbox(
 
500
  start_date_str = start_date.strftime('%Y-%m-%d')
501
  end_date_str = end_date.strftime('%Y-%m-%d')
502
 
 
 
 
 
 
 
 
 
 
 
 
503
  aggregation_period = st.selectbox(
504
  "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
505
  ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
 
507
  )
508
 
509
  shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
510
+
511
  kernel_size = None
512
  include_boundary = None
513
 
 
525
  help="Check to include pixels on the polygon boundary; uncheck to exclude them."
526
  )
527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
529
  locations_df = pd.DataFrame()
530
  original_lat_col = None
 
647
 
648
  if st.button(f"Calculate {custom_formula}"):
649
  if not locations_df.empty:
650
+ # Use a spinner to indicate data processing
651
  with st.spinner("Processing Data..."):
652
  try:
653
+ # Call the aggregation function and capture results and processing time
654
  results, processing_time = process_aggregation(
655
  locations_df,
656
  start_date_str,
 
660
  reducer_choice,
661
  shape_type,
662
  aggregation_period,
663
+ custom_formula,
664
+ kernel_size,
665
+ include_boundary
 
 
 
 
666
  )
667
+ # Check if results were generated
668
  if results:
669
  result_df = pd.DataFrame(results)
670
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
671
  st.dataframe(result_df)
672
+ # Generate a downloadable CSV file
673
+ filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
674
+
675
  st.download_button(
676
  label="Download results as CSV",
677
  data=result_df.to_csv(index=False).encode('utf-8'),
678
  file_name=filename,
679
  mime='text/csv'
680
  )
681
+ # Display processing time
682
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683
  else:
684
  st.warning("No results were generated. Check your inputs or formula.")
685
+ st.info(f"Total processing time: {processing_time:.2f} seconds.") # Show processing time even if no results
686
  except Exception as e:
687
  st.error(f"An error occurred during processing: {str(e)}")
688
  else:
689
+ st.warning("Please upload a file to proceed.")