YashMK89 commited on
Commit
fdee337
·
verified ·
1 Parent(s): 9eabcdb

update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -67
app.py CHANGED
@@ -191,10 +191,10 @@ def aggregate_data_yearly(collection):
191
  return ee.ImageCollection(yearly_images)
192
 
193
  # Worker function for processing a single geometry
194
- def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None):
195
  if shape_type.lower() == "point":
196
- latitude = row.get('latitude')
197
- longitude = row.get('longitude')
198
  if pd.isna(latitude) or pd.isna(longitude):
199
  return None # Skip invalid points
200
  location_name = row.get('name', f"Location_{row.name}")
@@ -215,12 +215,10 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
215
  roi = roi.buffer(-30).bounds()
216
  except ValueError:
217
  return None # Skip invalid polygons
218
-
219
  # Filter and aggregate the image collection
220
  collection = ee.ImageCollection(dataset_id) \
221
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
222
  .filterBounds(roi)
223
-
224
  if aggregation_period.lower() == 'custom (start date to end date)':
225
  collection = aggregate_data_custom(collection)
226
  elif aggregation_period.lower() == 'weekly':
@@ -229,12 +227,10 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
229
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
230
  elif aggregation_period.lower() == 'yearly':
231
  collection = aggregate_data_yearly(collection)
232
-
233
  # Process each image in the collection
234
  image_list = collection.toList(collection.size())
235
  processed_weeks = set()
236
  aggregated_results = []
237
-
238
  for i in range(image_list.size().getInfo()):
239
  image = ee.Image(image_list.get(i))
240
  if aggregation_period.lower() == 'custom (start date to end date)':
@@ -258,7 +254,6 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
258
  timestamp = image.get('year')
259
  period_label = 'Year'
260
  date = ee.Date(timestamp).format('YYYY').getInfo()
261
-
262
  index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
263
  try:
264
  index_value = index_image.reduceRegion(
@@ -276,21 +271,19 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
276
  'Calculated Value': calculated_value
277
  }
278
  if shape_type.lower() == 'point':
279
- result['Latitude'] = latitude
280
- result['Longitude'] = longitude
281
  aggregated_results.append(result)
282
  except Exception as e:
283
  st.error(f"Error retrieving value for {location_name}: {e}")
284
-
285
  return aggregated_results
286
 
287
  # Main processing function
288
- def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
289
  aggregated_results = []
290
  total_steps = len(locations_df)
291
  progress_bar = st.progress(0)
292
  progress_text = st.empty()
293
-
294
  start_time = time.time() # Start timing the process
295
  with ThreadPoolExecutor(max_workers=10) as executor:
296
  futures = []
@@ -307,10 +300,11 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
307
  aggregation_period,
308
  custom_formula,
309
  kernel_size,
310
- include_boundary
 
 
311
  )
312
  futures.append(future)
313
-
314
  completed = 0
315
  for future in as_completed(futures):
316
  result = future.result()
@@ -320,11 +314,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
320
  progress_percentage = completed / total_steps
321
  progress_bar.progress(progress_percentage)
322
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
323
-
324
  # End timing the process
325
  end_time = time.time()
326
  processing_time = end_time - start_time # Calculate total processing time
327
-
328
  if aggregated_results:
329
  result_df = pd.DataFrame(aggregated_results)
330
  if aggregation_period.lower() == 'custom (start date to end date)':
@@ -334,8 +326,8 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
334
  'Calculated Value': 'mean'
335
  }
336
  if shape_type.lower() == 'point':
337
- agg_dict['Latitude'] = 'first'
338
- agg_dict['Longitude'] = 'first'
339
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
340
  aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
341
  return aggregated_output.to_dict(orient='records'), processing_time # Return processing time
@@ -349,7 +341,6 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
349
 
350
  # Initialize data as an empty dictionary
351
  data = {}
352
-
353
  if imagery_base == "Sentinel":
354
  dataset_file = "sentinel_datasets.json"
355
  try:
@@ -401,12 +392,10 @@ if not data:
401
  st.stop()
402
 
403
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
404
-
405
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
406
 
407
  sub_selection = None
408
  dataset_id = None
409
-
410
  if main_selection:
411
  sub_options = data[main_selection]["sub_options"]
412
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -416,7 +405,6 @@ if main_selection:
416
  dataset_id = sub_selection
417
 
418
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
419
-
420
  if main_selection and sub_selection:
421
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
422
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
@@ -426,11 +414,9 @@ if main_selection and sub_selection:
426
  default=[dataset_bands[0]] if dataset_bands else [],
427
  help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
428
  )
429
-
430
  if len(selected_bands) < 1:
431
  st.warning("Please select at least one band.")
432
  st.stop()
433
-
434
  if selected_bands:
435
  if len(selected_bands) == 1:
436
  default_formula = f"{selected_bands[0]}"
@@ -443,7 +429,6 @@ if main_selection and sub_selection:
443
  value=default_formula,
444
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
445
  )
446
-
447
  def validate_formula(formula, selected_bands):
448
  allowed_chars = set(" +-*/()0123456789.")
449
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -453,7 +438,6 @@ if main_selection and sub_selection:
453
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
454
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
455
  return True, ""
456
-
457
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
458
  if not is_valid:
459
  st.error(error_message)
@@ -461,7 +445,6 @@ if main_selection and sub_selection:
461
  elif not custom_formula:
462
  st.warning("Please enter a custom formula to proceed.")
463
  st.stop()
464
-
465
  st.write(f"Custom Formula: {custom_formula}")
466
 
467
  reducer_choice = st.selectbox(
@@ -508,47 +491,40 @@ if file_upload is not None:
508
  if file_upload.name.endswith('.csv'):
509
  # Read the CSV file
510
  locations_df = pd.read_csv(file_upload)
511
-
512
  # Show the first few rows to help user identify columns
513
  st.write("Preview of your uploaded data (first 5 rows):")
514
  st.dataframe(locations_df.head())
515
-
516
- # Get all column names from the uploaded file
517
- all_columns = locations_df.columns.tolist()
518
-
519
- # Let user select latitude and longitude columns from dropdown
520
  col1, col2 = st.columns(2)
521
  with col1:
522
  lat_col = st.selectbox(
523
- "Select Latitude Column",
524
- options=all_columns,
525
- index=all_columns.index('latitude') if 'latitude' in all_columns else 0,
526
- help="Select the column containing latitude values"
527
  )
528
  with col2:
529
  lon_col = st.selectbox(
530
- "Select Longitude Column",
531
- options=all_columns,
532
- index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
533
- help="Select the column containing longitude values"
534
  )
535
-
536
  # Validate the selected columns contain numeric data
537
  if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
538
- st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
539
  st.stop()
540
-
541
- # Rename the selected columns to standard names for processing
542
- locations_df = locations_df.rename(columns={
543
- lat_col: 'latitude',
544
- lon_col: 'longitude'
545
- })
546
-
547
  elif file_upload.name.endswith('.geojson'):
548
  locations_df = gpd.read_file(file_upload)
549
  if 'geometry' in locations_df.columns:
550
- locations_df['latitude'] = locations_df['geometry'].y
551
- locations_df['longitude'] = locations_df['geometry'].x
 
 
 
 
 
552
  else:
553
  st.error("GeoJSON file doesn't contain geometry column")
554
  st.stop()
@@ -571,23 +547,25 @@ if file_upload is not None:
571
  st.error("No valid Point data found in the KML file.")
572
  else:
573
  locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
574
- locations_df['latitude'] = locations_df['geometry'].y
575
- locations_df['longitude'] = locations_df['geometry'].x
 
 
 
 
576
  except Exception as e:
577
  st.error(f"Error parsing KML file: {str(e)}")
578
-
579
  # Display map for points if we have valid data
580
- if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
581
- m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
582
  for _, row in locations_df.iterrows():
583
- latitude = row['latitude']
584
- longitude = row['longitude']
585
  if pd.isna(latitude) or pd.isna(longitude):
586
  continue
587
  m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
588
  st.write("Map of Uploaded Points:")
589
  m.to_streamlit()
590
-
591
  elif shape_type.lower() == "polygon":
592
  if file_upload.name.endswith('.csv'):
593
  st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
@@ -617,7 +595,6 @@ if file_upload is not None:
617
  locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
618
  except Exception as e:
619
  st.error(f"Error parsing KML file: {str(e)}")
620
-
621
  # Display map for polygons if we have valid data
622
  if not locations_df.empty and 'geometry' in locations_df.columns:
623
  centroid_lat = locations_df.geometry.centroid.y.mean()
@@ -646,14 +623,14 @@ if st.button(f"Calculate {custom_formula}"):
646
  aggregation_period,
647
  custom_formula,
648
  kernel_size,
649
- include_boundary
 
 
650
  )
651
-
652
  if results:
653
  result_df = pd.DataFrame(results)
654
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
655
  st.dataframe(result_df)
656
-
657
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
658
  st.download_button(
659
  label="Download results as CSV",
@@ -661,12 +638,10 @@ if st.button(f"Calculate {custom_formula}"):
661
  file_name=filename,
662
  mime='text/csv'
663
  )
664
-
665
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
666
  else:
667
  st.warning("No results were generated. Check your inputs or formula.")
668
  st.info(f"Total processing time: {processing_time:.2f} seconds.")
669
-
670
  except Exception as e:
671
  st.error(f"An error occurred during processing: {str(e)}")
672
  else:
 
191
  return ee.ImageCollection(yearly_images)
192
 
193
  # Worker function for processing a single geometry
194
+ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
195
  if shape_type.lower() == "point":
196
+ latitude = row.get(lat_col)
197
+ longitude = row.get(lon_col)
198
  if pd.isna(latitude) or pd.isna(longitude):
199
  return None # Skip invalid points
200
  location_name = row.get('name', f"Location_{row.name}")
 
215
  roi = roi.buffer(-30).bounds()
216
  except ValueError:
217
  return None # Skip invalid polygons
 
218
  # Filter and aggregate the image collection
219
  collection = ee.ImageCollection(dataset_id) \
220
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
221
  .filterBounds(roi)
 
222
  if aggregation_period.lower() == 'custom (start date to end date)':
223
  collection = aggregate_data_custom(collection)
224
  elif aggregation_period.lower() == 'weekly':
 
227
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
228
  elif aggregation_period.lower() == 'yearly':
229
  collection = aggregate_data_yearly(collection)
 
230
  # Process each image in the collection
231
  image_list = collection.toList(collection.size())
232
  processed_weeks = set()
233
  aggregated_results = []
 
234
  for i in range(image_list.size().getInfo()):
235
  image = ee.Image(image_list.get(i))
236
  if aggregation_period.lower() == 'custom (start date to end date)':
 
254
  timestamp = image.get('year')
255
  period_label = 'Year'
256
  date = ee.Date(timestamp).format('YYYY').getInfo()
 
257
  index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
258
  try:
259
  index_value = index_image.reduceRegion(
 
271
  'Calculated Value': calculated_value
272
  }
273
  if shape_type.lower() == 'point':
274
+ result[lat_col] = latitude
275
+ result[lon_col] = longitude
276
  aggregated_results.append(result)
277
  except Exception as e:
278
  st.error(f"Error retrieving value for {location_name}: {e}")
 
279
  return aggregated_results
280
 
281
  # Main processing function
282
+ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
283
  aggregated_results = []
284
  total_steps = len(locations_df)
285
  progress_bar = st.progress(0)
286
  progress_text = st.empty()
 
287
  start_time = time.time() # Start timing the process
288
  with ThreadPoolExecutor(max_workers=10) as executor:
289
  futures = []
 
300
  aggregation_period,
301
  custom_formula,
302
  kernel_size,
303
+ include_boundary,
304
+ lat_col,
305
+ lon_col
306
  )
307
  futures.append(future)
 
308
  completed = 0
309
  for future in as_completed(futures):
310
  result = future.result()
 
314
  progress_percentage = completed / total_steps
315
  progress_bar.progress(progress_percentage)
316
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
 
317
  # End timing the process
318
  end_time = time.time()
319
  processing_time = end_time - start_time # Calculate total processing time
 
320
  if aggregated_results:
321
  result_df = pd.DataFrame(aggregated_results)
322
  if aggregation_period.lower() == 'custom (start date to end date)':
 
326
  'Calculated Value': 'mean'
327
  }
328
  if shape_type.lower() == 'point':
329
+ agg_dict[lat_col] = 'first'
330
+ agg_dict[lon_col] = 'first'
331
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
332
  aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
333
  return aggregated_output.to_dict(orient='records'), processing_time # Return processing time
 
341
 
342
  # Initialize data as an empty dictionary
343
  data = {}
 
344
  if imagery_base == "Sentinel":
345
  dataset_file = "sentinel_datasets.json"
346
  try:
 
392
  st.stop()
393
 
394
  st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 
395
  main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
396
 
397
  sub_selection = None
398
  dataset_id = None
 
399
  if main_selection:
400
  sub_options = data[main_selection]["sub_options"]
401
  sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
 
405
  dataset_id = sub_selection
406
 
407
  st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 
408
  if main_selection and sub_selection:
409
  dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
410
  st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
 
414
  default=[dataset_bands[0]] if dataset_bands else [],
415
  help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
416
  )
 
417
  if len(selected_bands) < 1:
418
  st.warning("Please select at least one band.")
419
  st.stop()
 
420
  if selected_bands:
421
  if len(selected_bands) == 1:
422
  default_formula = f"{selected_bands[0]}"
 
429
  value=default_formula,
430
  help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
431
  )
 
432
  def validate_formula(formula, selected_bands):
433
  allowed_chars = set(" +-*/()0123456789.")
434
  terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
 
438
  if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
439
  return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
440
  return True, ""
 
441
  is_valid, error_message = validate_formula(custom_formula, selected_bands)
442
  if not is_valid:
443
  st.error(error_message)
 
445
  elif not custom_formula:
446
  st.warning("Please enter a custom formula to proceed.")
447
  st.stop()
 
448
  st.write(f"Custom Formula: {custom_formula}")
449
 
450
  reducer_choice = st.selectbox(
 
491
  if file_upload.name.endswith('.csv'):
492
  # Read the CSV file
493
  locations_df = pd.read_csv(file_upload)
 
494
  # Show the first few rows to help user identify columns
495
  st.write("Preview of your uploaded data (first 5 rows):")
496
  st.dataframe(locations_df.head())
497
+
498
+ # Dynamically populate dropdown menus for latitude and longitude
 
 
 
499
  col1, col2 = st.columns(2)
500
  with col1:
501
  lat_col = st.selectbox(
502
+ "Select the Latitude column",
503
+ options=locations_df.columns,
504
+ help="Choose the column containing latitude values."
 
505
  )
506
  with col2:
507
  lon_col = st.selectbox(
508
+ "Select the Longitude column",
509
+ options=locations_df.columns,
510
+ help="Choose the column containing longitude values."
 
511
  )
512
+
513
  # Validate the selected columns contain numeric data
514
  if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
515
+ st.error("Error: Selected Latitude and Longitude columns must contain numeric values.")
516
  st.stop()
517
+
 
 
 
 
 
 
518
  elif file_upload.name.endswith('.geojson'):
519
  locations_df = gpd.read_file(file_upload)
520
  if 'geometry' in locations_df.columns:
521
+ # Extract latitude and longitude from geometry
522
+ locations_df['original_latitude'] = locations_df['geometry'].y
523
+ locations_df['original_longitude'] = locations_df['geometry'].x
524
+
525
+ # Preserve original column names
526
+ lat_col = 'original_latitude'
527
+ lon_col = 'original_longitude'
528
  else:
529
  st.error("GeoJSON file doesn't contain geometry column")
530
  st.stop()
 
547
  st.error("No valid Point data found in the KML file.")
548
  else:
549
  locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
550
+ locations_df['original_latitude'] = locations_df['geometry'].y
551
+ locations_df['original_longitude'] = locations_df['geometry'].x
552
+
553
+ # Preserve original column names
554
+ lat_col = 'original_latitude'
555
+ lon_col = 'original_longitude'
556
  except Exception as e:
557
  st.error(f"Error parsing KML file: {str(e)}")
 
558
  # Display map for points if we have valid data
559
+ if not locations_df.empty and lat_col in locations_df.columns and lon_col in locations_df.columns:
560
+ m = leafmap.Map(center=[locations_df[lat_col].mean(), locations_df[lon_col].mean()], zoom=10)
561
  for _, row in locations_df.iterrows():
562
+ latitude = row[lat_col]
563
+ longitude = row[lon_col]
564
  if pd.isna(latitude) or pd.isna(longitude):
565
  continue
566
  m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
567
  st.write("Map of Uploaded Points:")
568
  m.to_streamlit()
 
569
  elif shape_type.lower() == "polygon":
570
  if file_upload.name.endswith('.csv'):
571
  st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
 
595
  locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
596
  except Exception as e:
597
  st.error(f"Error parsing KML file: {str(e)}")
 
598
  # Display map for polygons if we have valid data
599
  if not locations_df.empty and 'geometry' in locations_df.columns:
600
  centroid_lat = locations_df.geometry.centroid.y.mean()
 
623
  aggregation_period,
624
  custom_formula,
625
  kernel_size,
626
+ include_boundary,
627
+ lat_col=lat_col if shape_type.lower() == "point" else None,
628
+ lon_col=lon_col if shape_type.lower() == "point" else None
629
  )
 
630
  if results:
631
  result_df = pd.DataFrame(results)
632
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
633
  st.dataframe(result_df)
 
634
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
635
  st.download_button(
636
  label="Download results as CSV",
 
638
  file_name=filename,
639
  mime='text/csv'
640
  )
 
641
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
642
  else:
643
  st.warning("No results were generated. Check your inputs or formula.")
644
  st.info(f"Total processing time: {processing_time:.2f} seconds.")
 
645
  except Exception as e:
646
  st.error(f"An error occurred during processing: {str(e)}")
647
  else: