Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on 3 days ago

Commit

6f6b853

verified ·

1 Parent(s): 94ad9c1

update app.py

Browse files

Files changed (1) hide show

app.py +71 -8

app.py CHANGED Viewed

@@ -213,6 +213,53 @@ def aggregate_data_yearly(collection):
     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
 def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
@@ -314,12 +361,21 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
 # Main processing function
-def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
@@ -349,9 +405,11 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
@@ -491,6 +549,16 @@ end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
     "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
     ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
@@ -699,16 +767,13 @@ if st.button(f"Calculate {custom_formula}"):
                     original_lon_col,
                     custom_formula,
                     kernel_size,
-                    include_boundary
                 )
                 if results:
                     result_df = pd.DataFrame(results)
-                    # Display processed results table
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
-                    # Download button for results
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
@@ -716,8 +781,6 @@ if st.button(f"Calculate {custom_formula}"):
                         file_name=filename,
                         mime='text/csv'
                     )
-                    # Success message
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                     # Graph Visualization Section

     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
+def calculate_cloud_percentage(image, cloud_band='MSK_CLDPRB'):
+    """
+    Calculate the percentage of cloud-covered pixels in an image.
+    Assumes the presence of a cloud probability band (e.g., 'MSK_CLDPRB').
+    """
+    cloud_mask = image.select(cloud_band).gt(50)  # Pixels with cloud probability > 50% are considered cloudy
+    total_pixels = image.select(cloud_band).reduceRegion(
+        reducer=ee.Reducer.count(),
+        geometry=image.geometry(),
+        scale=30,
+        maxPixels=1e13
+    ).get(cloud_band)
+    cloudy_pixels = cloud_mask.reduceRegion(
+        reducer=ee.Reducer.sum(),
+        geometry=image.geometry(),
+        scale=30,
+        maxPixels=1e13
+    ).get(cloud_band)
+    return ee.Number(cloudy_pixels).divide(ee.Number(total_pixels)).multiply(100)
+# Preprocessing function with cloud filtering
+def preprocess_collection(collection, cloud_threshold):
+    """
+    Apply cloud filtering to the image collection.
+    - Tile-based filtering: Exclude tiles with cloud coverage exceeding the selected threshold.
+    - Pixel-based filtering: Mask out individual cloudy pixels.
+    """
+    def filter_tile(image):
+        # Calculate cloud percentage for the tile
+        cloud_percentage = calculate_cloud_percentage(image)
+        # Keep the tile only if cloud percentage is below the threshold
+        return image.set('cloud_percentage', cloud_percentage).updateMask(cloud_percentage.lt(cloud_threshold))
+    def mask_cloudy_pixels(image):
+        # Mask out individual cloudy pixels based on cloud probability
+        cloud_mask = image.select('MSK_CLDPRB').lte(50)  # Pixels with cloud probability <= 50% are clear
+        return image.updateMask(cloud_mask)
+    # Step 1: Apply tile-based filtering
+    filtered_collection = collection.map(filter_tile)
+    # Step 2: Apply pixel-based filtering
+    masked_collection = filtered_collection.map(mask_cloudy_pixels)
+    return masked_collection
 # Worker function for processing a single geometry
 def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
 # Main processing function
+def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None, cloud_threshold=0):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
+    # Preprocess the image collection with cloud filtering
+    raw_collection = ee.ImageCollection(dataset_id) \
+        .filterDate(ee.Date(start_date_str), ee.Date(end_date_str))
+    # Apply cloud filtering if threshold > 0
+    if cloud_threshold > 0:
+        raw_collection = preprocess_collection(raw_collection, cloud_threshold)
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
+st.markdown("<h5>Cloud Filtering</h5>", unsafe_allow_html=True)
+cloud_threshold = st.slider(
+    "Select Maximum Cloud Coverage Threshold (%)",
+    min_value=0,
+    max_value=50,
+    value=20,
+    step=5,
+    help="Tiles with cloud coverage exceeding this threshold will be excluded. Individual cloudy pixels will also be masked."
+)
 aggregation_period = st.selectbox(
     "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
     ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
                     original_lon_col,
                     custom_formula,
                     kernel_size,
+                    include_boundary,
+                    cloud_threshold=cloud_threshold
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                     # Graph Visualization Section