YashMK89 commited on
Commit
6f6b853
·
verified ·
1 Parent(s): 94ad9c1

update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -8
app.py CHANGED
@@ -213,6 +213,53 @@ def aggregate_data_yearly(collection):
213
  yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
214
  return ee.ImageCollection(yearly_images)
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  # Worker function for processing a single geometry
218
  def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
@@ -314,12 +361,21 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
314
 
315
 
316
  # Main processing function
317
- def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
318
  aggregated_results = []
319
  total_steps = len(locations_df)
320
  progress_bar = st.progress(0)
321
  progress_text = st.empty()
322
  start_time = time.time() # Start timing the process
 
 
 
 
 
 
 
 
 
323
  with ThreadPoolExecutor(max_workers=10) as executor:
324
  futures = []
325
  for idx, row in locations_df.iterrows():
@@ -349,9 +405,11 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
349
  progress_percentage = completed / total_steps
350
  progress_bar.progress(progress_percentage)
351
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
 
352
  # End timing the process
353
  end_time = time.time()
354
  processing_time = end_time - start_time # Calculate total processing time
 
355
  if aggregated_results:
356
  result_df = pd.DataFrame(aggregated_results)
357
  if aggregation_period.lower() == 'custom (start date to end date)':
@@ -491,6 +549,16 @@ end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
491
  start_date_str = start_date.strftime('%Y-%m-%d')
492
  end_date_str = end_date.strftime('%Y-%m-%d')
493
 
 
 
 
 
 
 
 
 
 
 
494
  aggregation_period = st.selectbox(
495
  "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
496
  ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
@@ -699,16 +767,13 @@ if st.button(f"Calculate {custom_formula}"):
699
  original_lon_col,
700
  custom_formula,
701
  kernel_size,
702
- include_boundary
 
703
  )
704
  if results:
705
  result_df = pd.DataFrame(results)
706
-
707
- # Display processed results table
708
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
709
  st.dataframe(result_df)
710
-
711
- # Download button for results
712
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
713
  st.download_button(
714
  label="Download results as CSV",
@@ -716,8 +781,6 @@ if st.button(f"Calculate {custom_formula}"):
716
  file_name=filename,
717
  mime='text/csv'
718
  )
719
-
720
- # Success message
721
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
722
 
723
  # Graph Visualization Section
 
213
  yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
214
  return ee.ImageCollection(yearly_images)
215
 
216
+ def calculate_cloud_percentage(image, cloud_band='MSK_CLDPRB'):
217
+ """
218
+ Calculate the percentage of cloud-covered pixels in an image.
219
+ Assumes the presence of a cloud probability band (e.g., 'MSK_CLDPRB').
220
+ """
221
+ cloud_mask = image.select(cloud_band).gt(50) # Pixels with cloud probability > 50% are considered cloudy
222
+ total_pixels = image.select(cloud_band).reduceRegion(
223
+ reducer=ee.Reducer.count(),
224
+ geometry=image.geometry(),
225
+ scale=30,
226
+ maxPixels=1e13
227
+ ).get(cloud_band)
228
+
229
+ cloudy_pixels = cloud_mask.reduceRegion(
230
+ reducer=ee.Reducer.sum(),
231
+ geometry=image.geometry(),
232
+ scale=30,
233
+ maxPixels=1e13
234
+ ).get(cloud_band)
235
+
236
+ return ee.Number(cloudy_pixels).divide(ee.Number(total_pixels)).multiply(100)
237
+
238
+ # Preprocessing function with cloud filtering
239
+ def preprocess_collection(collection, cloud_threshold):
240
+ """
241
+ Apply cloud filtering to the image collection.
242
+ - Tile-based filtering: Exclude tiles with cloud coverage exceeding the selected threshold.
243
+ - Pixel-based filtering: Mask out individual cloudy pixels.
244
+ """
245
+ def filter_tile(image):
246
+ # Calculate cloud percentage for the tile
247
+ cloud_percentage = calculate_cloud_percentage(image)
248
+ # Keep the tile only if cloud percentage is below the threshold
249
+ return image.set('cloud_percentage', cloud_percentage).updateMask(cloud_percentage.lt(cloud_threshold))
250
+
251
+ def mask_cloudy_pixels(image):
252
+ # Mask out individual cloudy pixels based on cloud probability
253
+ cloud_mask = image.select('MSK_CLDPRB').lte(50) # Pixels with cloud probability <= 50% are clear
254
+ return image.updateMask(cloud_mask)
255
+
256
+ # Step 1: Apply tile-based filtering
257
+ filtered_collection = collection.map(filter_tile)
258
+
259
+ # Step 2: Apply pixel-based filtering
260
+ masked_collection = filtered_collection.map(mask_cloudy_pixels)
261
+
262
+ return masked_collection
263
 
264
  # Worker function for processing a single geometry
265
  def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
 
361
 
362
 
363
  # Main processing function
364
+ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None, cloud_threshold=0):
365
  aggregated_results = []
366
  total_steps = len(locations_df)
367
  progress_bar = st.progress(0)
368
  progress_text = st.empty()
369
  start_time = time.time() # Start timing the process
370
+
371
+ # Preprocess the image collection with cloud filtering
372
+ raw_collection = ee.ImageCollection(dataset_id) \
373
+ .filterDate(ee.Date(start_date_str), ee.Date(end_date_str))
374
+
375
+ # Apply cloud filtering if threshold > 0
376
+ if cloud_threshold > 0:
377
+ raw_collection = preprocess_collection(raw_collection, cloud_threshold)
378
+
379
  with ThreadPoolExecutor(max_workers=10) as executor:
380
  futures = []
381
  for idx, row in locations_df.iterrows():
 
405
  progress_percentage = completed / total_steps
406
  progress_bar.progress(progress_percentage)
407
  progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
408
+
409
  # End timing the process
410
  end_time = time.time()
411
  processing_time = end_time - start_time # Calculate total processing time
412
+
413
  if aggregated_results:
414
  result_df = pd.DataFrame(aggregated_results)
415
  if aggregation_period.lower() == 'custom (start date to end date)':
 
549
  start_date_str = start_date.strftime('%Y-%m-%d')
550
  end_date_str = end_date.strftime('%Y-%m-%d')
551
 
552
+ st.markdown("<h5>Cloud Filtering</h5>", unsafe_allow_html=True)
553
+ cloud_threshold = st.slider(
554
+ "Select Maximum Cloud Coverage Threshold (%)",
555
+ min_value=0,
556
+ max_value=50,
557
+ value=20,
558
+ step=5,
559
+ help="Tiles with cloud coverage exceeding this threshold will be excluded. Individual cloudy pixels will also be masked."
560
+ )
561
+
562
  aggregation_period = st.selectbox(
563
  "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
564
  ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
 
767
  original_lon_col,
768
  custom_formula,
769
  kernel_size,
770
+ include_boundary,
771
+ cloud_threshold=cloud_threshold
772
  )
773
  if results:
774
  result_df = pd.DataFrame(results)
 
 
775
  st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
776
  st.dataframe(result_df)
 
 
777
  filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
778
  st.download_button(
779
  label="Download results as CSV",
 
781
  file_name=filename,
782
  mime='text/csv'
783
  )
 
 
784
  st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
785
 
786
  # Graph Visualization Section