YashMK89 commited on
Commit
609ffa3
·
verified ·
1 Parent(s): c84291c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -98
app.py CHANGED
@@ -11,8 +11,6 @@ from shapely.geometry import base
11
  from xml.etree import ElementTree as XET
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  import time
14
- import matplotlib.pyplot as plt
15
- import matplotlib.dates as mdates
16
 
17
  # Set up the page layout
18
  st.set_page_config(layout="wide")
@@ -145,9 +143,6 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
145
  return ee.Image(0).rename('custom_result').set('error', str(e))
146
 
147
  # Aggregation functions
148
- def aggregate_data_daily(collection):
149
- return collection
150
-
151
  def aggregate_data_custom(collection):
152
  collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
153
  grouped_by_day = collection.aggregate_array('day').distinct()
@@ -158,27 +153,21 @@ def aggregate_data_custom(collection):
158
  daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
159
  return ee.ImageCollection(daily_images)
160
 
161
- def aggregate_data_weekly(collection, start_date_str, end_date_str):
162
- start_date = ee.Date(start_date_str)
163
- end_date = ee.Date(end_date_str)
164
-
165
- # Create weekly intervals from start date to end date
166
- n_weeks = end_date.difference(start_date, 'week').ceil()
167
- week_starts = ee.List.sequence(0, n_weeks).map(lambda week: start_date.advance(week, 'week'))
168
-
 
169
  def calculate_weekly_mean(week_start):
170
- week_end = ee.Date(week_start).advance(1, 'week')
171
- weekly_collection = collection.filterDate(week_start, week_end)
172
  weekly_mean = weekly_collection.mean()
173
-
174
- return weekly_mean.set({
175
- 'week_start': week_start,
176
- 'week_end': week_end,
177
- 'system:time_start': ee.Date(week_start).millis()
178
- })
179
-
180
- weekly_images = ee.ImageCollection(week_starts.map(calculate_weekly_mean))
181
- return weekly_images.filter(ee.Filter.lte('system:time_start', end_date.millis()))
182
 
183
  def aggregate_data_monthly(collection, start_date, end_date):
184
  collection = collection.filterDate(start_date, end_date)
@@ -207,15 +196,15 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
207
  latitude = row.get('latitude')
208
  longitude = row.get('longitude')
209
  if pd.isna(latitude) or pd.isna(longitude):
210
- return None
211
  location_name = row.get('name', f"Location_{row.name}")
212
  if kernel_size == "3x3 Kernel":
213
- buffer_size = 45
214
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
215
  elif kernel_size == "5x5 Kernel":
216
- buffer_size = 75
217
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
218
- else:
219
  roi = ee.Geometry.Point([longitude, latitude])
220
  elif shape_type.lower() == "polygon":
221
  polygon_geometry = row.get('geometry')
@@ -225,46 +214,42 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
225
  if not include_boundary:
226
  roi = roi.buffer(-30).bounds()
227
  except ValueError:
228
- return None
229
 
 
230
  collection = ee.ImageCollection(dataset_id) \
231
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
232
  .filterBounds(roi)
233
 
234
- if aggregation_period.lower() == 'daily':
235
- collection = aggregate_data_daily(collection)
236
- elif aggregation_period.lower() == 'custom (start date to end date)':
237
  collection = aggregate_data_custom(collection)
238
  elif aggregation_period.lower() == 'weekly':
239
- collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
240
  elif aggregation_period.lower() == 'monthly':
241
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
242
  elif aggregation_period.lower() == 'yearly':
243
  collection = aggregate_data_yearly(collection)
244
 
 
245
  image_list = collection.toList(collection.size())
246
- processed_periods = set()
247
  aggregated_results = []
248
 
249
  for i in range(image_list.size().getInfo()):
250
  image = ee.Image(image_list.get(i))
251
-
252
- if aggregation_period.lower() == 'daily':
253
- timestamp = image.get('system:time_start')
254
- period_label = 'Date'
255
- date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
256
- elif aggregation_period.lower() == 'custom (start date to end date)':
257
  timestamp = image.get('day')
258
  period_label = 'Date'
259
  date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
260
  elif aggregation_period.lower() == 'weekly':
261
- week_start = image.get('week_start').getInfo()
262
- week_end = image.get('week_end').getInfo()
263
  period_label = 'Week'
264
- date = f"{week_start} to {week_end}"
265
- if date in processed_periods:
 
 
266
  continue
267
- processed_periods.add(date)
268
  elif aggregation_period.lower() == 'monthly':
269
  timestamp = image.get('month')
270
  period_label = 'Month'
@@ -288,12 +273,11 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
288
  period_label: date,
289
  'Start Date': start_date_str,
290
  'End Date': end_date_str,
291
- 'Calculated Value': calculated_value,
292
- 'Date': pd.to_datetime(date.split(' to ')[0]) if ' to ' in date else pd.to_datetime(date)
293
  }
294
  if shape_type.lower() == 'point':
295
- result[original_lat_col] = latitude
296
- result[original_lon_col] = longitude
297
  aggregated_results.append(result)
298
  except Exception as e:
299
  st.error(f"Error retrieving value for {location_name}: {e}")
@@ -307,7 +291,7 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
307
  progress_bar = st.progress(0)
308
  progress_text = st.empty()
309
 
310
- start_time = time.time()
311
  with ThreadPoolExecutor(max_workers=10) as executor:
312
  futures = []
313
  for idx, row in locations_df.iterrows():
@@ -339,8 +323,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
339
  progress_bar.progress(progress_percentage)
340
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
341
 
 
342
  end_time = time.time()
343
- processing_time = end_time - start_time
344
 
345
  if aggregated_results:
346
  result_df = pd.DataFrame(aggregated_results)
@@ -355,47 +340,10 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
355
  agg_dict[original_lon_col] = 'first'
356
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
357
  aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
358
- return aggregated_output, processing_time
359
  else:
360
- return result_df, processing_time
361
- return pd.DataFrame(), processing_time
362
-
363
- # Visualization function
364
- def plot_results(result_df, aggregation_period):
365
- if not result_df.empty and 'Date' in result_df.columns:
366
- fig, ax = plt.subplots(figsize=(12, 6))
367
-
368
- # Group by location and plot each line
369
- for name, group in result_df.groupby('Location Name'):
370
- group = group.sort_values('Date')
371
- if aggregation_period.lower() == 'weekly':
372
- # For weekly data, use the midpoint of the week for plotting
373
- dates = group['Date'] + pd.Timedelta(days=3)
374
- ax.plot(dates, group['Calculated Value'], 'o-', label=name)
375
- else:
376
- ax.plot(group['Date'], group['Calculated Value'], 'o-', label=name)
377
-
378
- # Format x-axis based on time period
379
- if aggregation_period.lower() == 'daily':
380
- ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
381
- ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
382
- elif aggregation_period.lower() == 'weekly':
383
- ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.MO))
384
- ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
385
- elif aggregation_period.lower() == 'monthly':
386
- ax.xaxis.set_major_locator(mdates.MonthLocator())
387
- ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
388
- elif aggregation_period.lower() == 'yearly':
389
- ax.xaxis.set_major_locator(mdates.YearLocator())
390
- ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
391
-
392
- plt.xticks(rotation=45)
393
- plt.xlabel('Date')
394
- plt.ylabel('Calculated Value')
395
- plt.title(f'{custom_formula} Values Over Time ({aggregation_period})')
396
- plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
397
- plt.tight_layout()
398
- st.pyplot(fig)
399
 
400
  # Streamlit App Logic
401
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
@@ -530,8 +478,8 @@ start_date_str = start_date.strftime('%Y-%m-%d')
530
  end_date_str = end_date.strftime('%Y-%m-%d')
531
 
532
  aggregation_period = st.selectbox(
533
- "Select Aggregation Period",
534
- ["Daily", "Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
535
  index=0
536
  )
537
 
@@ -562,12 +510,17 @@ original_lon_col = None
562
  if file_upload is not None:
563
  if shape_type.lower() == "point":
564
  if file_upload.name.endswith('.csv'):
 
565
  locations_df = pd.read_csv(file_upload)
 
 
566
  st.write("Preview of your uploaded data (first 5 rows):")
567
  st.dataframe(locations_df.head())
568
 
 
569
  all_columns = locations_df.columns.tolist()
570
 
 
571
  col1, col2 = st.columns(2)
572
  with col1:
573
  original_lat_col = st.selectbox(
@@ -584,10 +537,12 @@ if file_upload is not None:
584
  help="Select the column containing longitude values"
585
  )
586
 
 
587
  if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
588
  st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
589
  st.stop()
590
 
 
591
  locations_df = locations_df.rename(columns={
592
  original_lat_col: 'latitude',
593
  original_lon_col: 'longitude'
@@ -629,6 +584,7 @@ if file_upload is not None:
629
  except Exception as e:
630
  st.error(f"Error parsing KML file: {str(e)}")
631
 
 
632
  if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
633
  m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
634
  for _, row in locations_df.iterrows():
@@ -670,6 +626,7 @@ if file_upload is not None:
670
  except Exception as e:
671
  st.error(f"Error parsing KML file: {str(e)}")
672
 
 
673
  if not locations_df.empty and 'geometry' in locations_df.columns:
674
  centroid_lat = locations_df.geometry.centroid.y.mean()
675
  centroid_lon = locations_df.geometry.centroid.x.mean()
@@ -686,7 +643,7 @@ if st.button(f"Calculate {custom_formula}"):
686
  if not locations_df.empty:
687
  with st.spinner("Processing Data..."):
688
  try:
689
- result_df, processing_time = process_aggregation(
690
  locations_df,
691
  start_date_str,
692
  end_date_str,
@@ -702,13 +659,11 @@ if st.button(f"Calculate {custom_formula}"):
702
  include_boundary
703
  )
704
 
705
- if not result_df.empty:
 
706
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
707
  st.dataframe(result_df)
708
 
709
- # Plot the results
710
- plot_results(result_df, aggregation_period)
711
-
712
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
713
  st.download_button(
714
  label="Download results as CSV",
 
11
  from xml.etree import ElementTree as XET
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  import time
 
 
14
 
15
  # Set up the page layout
16
  st.set_page_config(layout="wide")
 
143
  return ee.Image(0).rename('custom_result').set('error', str(e))
144
 
145
  # Aggregation functions
 
 
 
146
  def aggregate_data_custom(collection):
147
  collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
148
  grouped_by_day = collection.aggregate_array('day').distinct()
 
153
  daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
154
  return ee.ImageCollection(daily_images)
155
 
156
+ def aggregate_data_weekly(collection):
157
+ def set_week_start(image):
158
+ date = ee.Date(image.get('system:time_start'))
159
+ days_since_week_start = date.getRelative('day', 'week')
160
+ offset = ee.Number(days_since_week_start).multiply(-1)
161
+ week_start = date.advance(offset, 'day')
162
+ return image.set('week_start', week_start.format('YYYY-MM-dd'))
163
+ collection = collection.map(set_week_start)
164
+ grouped_by_week = collection.aggregate_array('week_start').distinct()
165
  def calculate_weekly_mean(week_start):
166
+ weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
 
167
  weekly_mean = weekly_collection.mean()
168
+ return weekly_mean.set('week_start', week_start)
169
+ weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
170
+ return ee.ImageCollection(weekly_images)
 
 
 
 
 
 
171
 
172
  def aggregate_data_monthly(collection, start_date, end_date):
173
  collection = collection.filterDate(start_date, end_date)
 
196
  latitude = row.get('latitude')
197
  longitude = row.get('longitude')
198
  if pd.isna(latitude) or pd.isna(longitude):
199
+ return None # Skip invalid points
200
  location_name = row.get('name', f"Location_{row.name}")
201
  if kernel_size == "3x3 Kernel":
202
+ buffer_size = 45 # 90m x 90m
203
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
204
  elif kernel_size == "5x5 Kernel":
205
+ buffer_size = 75 # 150m x 150m
206
  roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
207
+ else: # Point
208
  roi = ee.Geometry.Point([longitude, latitude])
209
  elif shape_type.lower() == "polygon":
210
  polygon_geometry = row.get('geometry')
 
214
  if not include_boundary:
215
  roi = roi.buffer(-30).bounds()
216
  except ValueError:
217
+ return None # Skip invalid polygons
218
 
219
+ # Filter and aggregate the image collection
220
  collection = ee.ImageCollection(dataset_id) \
221
  .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
222
  .filterBounds(roi)
223
 
224
+ if aggregation_period.lower() == 'custom (start date to end date)':
 
 
225
  collection = aggregate_data_custom(collection)
226
  elif aggregation_period.lower() == 'weekly':
227
+ collection = aggregate_data_weekly(collection)
228
  elif aggregation_period.lower() == 'monthly':
229
  collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
230
  elif aggregation_period.lower() == 'yearly':
231
  collection = aggregate_data_yearly(collection)
232
 
233
+ # Process each image in the collection
234
  image_list = collection.toList(collection.size())
235
+ processed_weeks = set()
236
  aggregated_results = []
237
 
238
  for i in range(image_list.size().getInfo()):
239
  image = ee.Image(image_list.get(i))
240
+ if aggregation_period.lower() == 'custom (start date to end date)':
 
 
 
 
 
241
  timestamp = image.get('day')
242
  period_label = 'Date'
243
  date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
244
  elif aggregation_period.lower() == 'weekly':
245
+ timestamp = image.get('week_start')
 
246
  period_label = 'Week'
247
+ date = ee.String(timestamp).getInfo()
248
+ if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
249
+ pd.to_datetime(date) > pd.to_datetime(end_date_str) or
250
+ date in processed_weeks):
251
  continue
252
+ processed_weeks.add(date)
253
  elif aggregation_period.lower() == 'monthly':
254
  timestamp = image.get('month')
255
  period_label = 'Month'
 
273
  period_label: date,
274
  'Start Date': start_date_str,
275
  'End Date': end_date_str,
276
+ 'Calculated Value': calculated_value
 
277
  }
278
  if shape_type.lower() == 'point':
279
+ result[original_lat_col] = latitude # Use original column name
280
+ result[original_lon_col] = longitude # Use original column name
281
  aggregated_results.append(result)
282
  except Exception as e:
283
  st.error(f"Error retrieving value for {location_name}: {e}")
 
291
  progress_bar = st.progress(0)
292
  progress_text = st.empty()
293
 
294
+ start_time = time.time() # Start timing the process
295
  with ThreadPoolExecutor(max_workers=10) as executor:
296
  futures = []
297
  for idx, row in locations_df.iterrows():
 
323
  progress_bar.progress(progress_percentage)
324
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
325
 
326
+ # End timing the process
327
  end_time = time.time()
328
+ processing_time = end_time - start_time # Calculate total processing time
329
 
330
  if aggregated_results:
331
  result_df = pd.DataFrame(aggregated_results)
 
340
  agg_dict[original_lon_col] = 'first'
341
  aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
342
  aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
343
+ return aggregated_output.to_dict(orient='records'), processing_time
344
  else:
345
+ return result_df.to_dict(orient='records'), processing_time
346
+ return [], processing_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
  # Streamlit App Logic
349
  st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 
478
  end_date_str = end_date.strftime('%Y-%m-%d')
479
 
480
  aggregation_period = st.selectbox(
481
+ "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
482
+ ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
483
  index=0
484
  )
485
 
 
510
  if file_upload is not None:
511
  if shape_type.lower() == "point":
512
  if file_upload.name.endswith('.csv'):
513
+ # Read the CSV file
514
  locations_df = pd.read_csv(file_upload)
515
+
516
+ # Show the first few rows to help user identify columns
517
  st.write("Preview of your uploaded data (first 5 rows):")
518
  st.dataframe(locations_df.head())
519
 
520
+ # Get all column names from the uploaded file
521
  all_columns = locations_df.columns.tolist()
522
 
523
+ # Let user select latitude and longitude columns from dropdown
524
  col1, col2 = st.columns(2)
525
  with col1:
526
  original_lat_col = st.selectbox(
 
537
  help="Select the column containing longitude values"
538
  )
539
 
540
+ # Validate the selected columns contain numeric data
541
  if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
542
  st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
543
  st.stop()
544
 
545
+ # Rename the selected columns to standard names for processing
546
  locations_df = locations_df.rename(columns={
547
  original_lat_col: 'latitude',
548
  original_lon_col: 'longitude'
 
584
  except Exception as e:
585
  st.error(f"Error parsing KML file: {str(e)}")
586
 
587
+ # Display map for points if we have valid data
588
  if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
589
  m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
590
  for _, row in locations_df.iterrows():
 
626
  except Exception as e:
627
  st.error(f"Error parsing KML file: {str(e)}")
628
 
629
+ # Display map for polygons if we have valid data
630
  if not locations_df.empty and 'geometry' in locations_df.columns:
631
  centroid_lat = locations_df.geometry.centroid.y.mean()
632
  centroid_lon = locations_df.geometry.centroid.x.mean()
 
643
  if not locations_df.empty:
644
  with st.spinner("Processing Data..."):
645
  try:
646
+ results, processing_time = process_aggregation(
647
  locations_df,
648
  start_date_str,
649
  end_date_str,
 
659
  include_boundary
660
  )
661
 
662
+ if results:
663
+ result_df = pd.DataFrame(results)
664
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
665
  st.dataframe(result_df)
666
 
 
 
 
667
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
668
  st.download_button(
669
  label="Download results as CSV",