Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on Apr 24

Commit

d805acf

verified ·

1 Parent(s): 848c770

update app.py

Browse files

Files changed (1) hide show

app.py +99 -262

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import json
 import ee
 import os
 import pandas as pd
-import numpy as np
 import geopandas as gpd
 from datetime import datetime
 import leafmap.foliumap as leafmap
@@ -12,8 +11,9 @@ from shapely.geometry import base
 from xml.etree import ElementTree as XET
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time
-import matplotlib.pyplot as plt
-import plotly.express as px
 # Set up the page layout
 st.set_page_config(layout="wide")
@@ -107,49 +107,28 @@ def convert_to_ee_geometry(geometry):
     else:
         raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
-# Function to calculate custom formula with dynamic scale handling
-def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, dataset_id, user_scale=None):
     try:
-        # Fetch the nominal scales of the selected bands
-        band_scales = []
-        for band in selected_bands:
-            band_scale = image.select(band).projection().nominalScale().getInfo()
-            band_scales.append(band_scale)
-        default_scale = min(band_scales) if band_scales else 30  # Default to 30m if no bands are found
-        scale = user_scale if user_scale is not None else default_scale
-        # Rescale all bands to the chosen scale
-        rescaled_bands = {}
         for band in selected_bands:
-            band_image = image.select(band)
-            band_scale = band_image.projection().nominalScale().getInfo()
-            if band_scale != scale:
-                rescaled_band = band_image.resample('bilinear').reproject(
-                    crs=band_image.projection().crs(),
-                    scale=scale
-                )
-                rescaled_bands[band] = rescaled_band
-            else:
-                rescaled_bands[band] = band_image
-        # Validate and extract band values
-        reduced_values = {}
         reducer = get_reducer(reducer_choice)
         for band in selected_bands:
-            value = rescaled_bands[band].reduceRegion(
                 reducer=reducer,
                 geometry=geometry,
                 scale=scale
             ).get(band).getInfo()
             reduced_values[band] = float(value if value is not None else 0)
-        # Evaluate the custom formula
         formula = custom_formula
         for band in selected_bands:
             formula = formula.replace(band, str(reduced_values[band]))
         result = eval(formula, {"__builtins__": {}}, reduced_values)
-        # Validate the result
         if not isinstance(result, (int, float)):
             raise ValueError("Formula did not result in a numeric value.")
         return ee.Image.constant(result).rename('custom_result')
@@ -177,35 +156,21 @@ def aggregate_data_custom(collection):
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
-def aggregate_data_daily(collection):
-    def set_day_start(image):
         date = ee.Date(image.get('system:time_start'))
-        day_start = date.format('YYYY-MM-dd')
-        return image.set('day_start', day_start)
-    collection = collection.map(set_day_start)
-    grouped_by_day = collection.aggregate_array('day_start').distinct()
-    def calculate_daily_mean(day_start):
-        daily_collection = collection.filter(ee.Filter.eq('day_start', day_start))
-        daily_mean = daily_collection.mean()
-        return daily_mean.set('day_start', day_start)
-    daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
-    return ee.ImageCollection(daily_images)
-def aggregate_data_weekly(collection, start_date_str, end_date_str):
-    start_date = ee.Date(start_date_str)
-    end_date = ee.Date(end_date_str)
-    days_diff = end_date.difference(start_date, 'day')
-    num_weeks = days_diff.divide(7).ceil().getInfo()
-    weekly_images = []
-    for week in range(num_weeks):
-        week_start = start_date.advance(week * 7, 'day')
-        week_end = week_start.advance(7, 'day')
-        weekly_collection = collection.filterDate(week_start, week_end)
-        if weekly_collection.size().getInfo() > 0:
-            weekly_mean = weekly_collection.mean()
-            weekly_mean = weekly_mean.set('week_start', week_start.format('YYYY-MM-dd'))
-            weekly_images.append(weekly_mean)
-    return ee.ImageCollection.fromImages(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
@@ -228,57 +193,35 @@ def aggregate_data_yearly(collection):
     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
-# Cloud percentage calculation
-def calculate_cloud_percentage(image, cloud_band='QA60'):
-    qa60 = image.select(cloud_band)
-    opaque_clouds = qa60.bitwiseAnd(1 << 10)
-    cirrus_clouds = qa60.bitwiseAnd(1 << 11)
-    cloud_mask = opaque_clouds.Or(cirrus_clouds)
-    total_pixels = qa60.reduceRegion(
-        reducer=ee.Reducer.count(),
-        geometry=image.geometry(),
-        scale=60,
-        maxPixels=1e13
-    ).get(cloud_band)
-    cloudy_pixels = cloud_mask.reduceRegion(
-        reducer=ee.Reducer.sum(),
-        geometry=image.geometry(),
-        scale=60,
-        maxPixels=1e13
-    ).get(cloud_band)
-    if total_pixels == 0:
-        return 0
-    return ee.Number(cloudy_pixels).divide(ee.Number(total_pixels)).multiply(100)
-# Preprocessing function
-def preprocess_collection(collection, pixel_cloud_threshold):
-    def mask_cloudy_pixels(image):
-        qa60 = image.select('QA60')
-        opaque_clouds = qa60.bitwiseAnd(1 << 10)
-        cirrus_clouds = qa60.bitwiseAnd(1 << 11)
-        cloud_mask = opaque_clouds.Or(cirrus_clouds)
-        clear_pixels = cloud_mask.Not()
-        return image.updateMask(clear_pixels)
-    if pixel_cloud_threshold > 0:
-        return collection.map(mask_cloudy_pixels)
-    return collection
-# Process single geometry
-def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None, user_scale=None, pixel_cloud_threshold=0):
     if shape_type.lower() == "point":
         latitude = row.get('latitude')
         longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
-            return None
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
-            buffer_size = 45
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
-            buffer_size = 75
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
-        else:
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
@@ -288,33 +231,25 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
-            return None
-    # Filter collection by date and area first
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
-        .filterBounds(roi)
-    st.write(f"After initial filtering: {collection.size().getInfo()} images")
-    # Apply pixel cloud masking if threshold > 0
-    if pixel_cloud_threshold > 0:
-        collection = preprocess_collection(collection, pixel_cloud_threshold)
-        st.write(f"After cloud masking: {collection.size().getInfo()} images")
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'daily':
         collection = aggregate_data_daily(collection)
     elif aggregation_period.lower() == 'weekly':
-        collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
     image_list = collection.toList(collection.size())
-    processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
@@ -324,17 +259,12 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'daily':
             timestamp = image.get('day_start')
-            period_label = 'Date'
             date = ee.String(timestamp).getInfo()
         elif aggregation_period.lower() == 'weekly':
             timestamp = image.get('week_start')
             period_label = 'Week'
             date = ee.String(timestamp).getInfo()
-            if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
-                pd.to_datetime(date) > pd.to_datetime(end_date_str) or
-                date in processed_weeks):
-                continue
-            processed_weeks.add(date)
         elif aggregation_period.lower() == 'monthly':
             timestamp = image.get('month')
             period_label = 'Month'
@@ -343,13 +273,12 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             timestamp = image.get('year')
             period_label = 'Year'
             date = ee.Date(timestamp).format('YYYY').getInfo()
-        index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, dataset_id, user_scale=user_scale)
         try:
             index_value = index_image.reduceRegion(
                 reducer=get_reducer(reducer_choice),
                 geometry=roi,
-                scale=user_scale
             ).get('custom_result')
             calculated_value = index_value.getInfo()
             if isinstance(calculated_value, (int, float)):
@@ -361,30 +290,20 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
-                    result[original_lat_col] = latitude
-                    result[original_lon_col] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
-# Process aggregation
-def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None, tile_cloud_threshold=0, pixel_cloud_threshold=0, user_scale=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
-    start_time = time.time()
-    raw_collection = ee.ImageCollection(dataset_id) \
-        .filterDate(ee.Date(start_date_str), ee.Date(end_date_str))
-    # st.write(f"Original Collection Size: {raw_collection.size().getInfo()}")
-    if tile_cloud_threshold > 0 or pixel_cloud_threshold > 0:
-        raw_collection = preprocess_collection(raw_collection, pixel_cloud_threshold)
-        # st.write(f"Preprocessed Collection Size: {raw_collection.size().getInfo()}")
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
@@ -399,11 +318,8 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 shape_type,
                 aggregation_period,
                 custom_formula,
-                original_lat_col,
-                original_lon_col,
                 kernel_size,
-                include_boundary,
-                user_scale=user_scale
             )
             futures.append(future)
         completed = 0
@@ -415,10 +331,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     end_time = time.time()
-    processing_time = end_time - start_time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
@@ -428,19 +343,22 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 'Calculated Value': 'mean'
             }
             if shape_type.lower() == 'point':
-                agg_dict[original_lat_col] = 'first'
-                agg_dict[original_lon_col] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
-            aggregated_output['Date Range'] = aggregated_output['Start Date'] + " to " + aggregated_output['End Date']
-            return aggregated_output.to_dict(orient='records'), processing_time
         else:
-            return result_df.to_dict(orient='records'), processing_time
-    return [], processing_time
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODIS", "VIIRS", "Custom Input"], index=0)
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
@@ -480,9 +398,7 @@ elif imagery_base == "Custom Input":
             if custom_dataset_id.startswith("ee.ImageCollection("):
                 custom_dataset_id = custom_dataset_id.replace("ee.ImageCollection('", "").replace("')", "")
             collection = ee.ImageCollection(custom_dataset_id)
-            first_image = collection.first()
-            default_scale = first_image.projection().nominalScale().getInfo()
-            band_names = first_image.bandNames().getInfo()
             data = {
                 f"Custom Dataset: {custom_dataset_id}": {
                     "sub_options": {custom_dataset_id: f"Custom Dataset ({custom_dataset_id})"},
@@ -497,14 +413,18 @@ elif imagery_base == "Custom Input":
     else:
         st.warning("Please enter a custom dataset ID to proceed.")
         data = {}
 if not data:
     st.error("No valid dataset available. Please check your inputs.")
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -512,7 +432,6 @@ if main_selection:
         st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
         st.write(f"Dataset ID: {sub_selection}")
         dataset_id = sub_selection
-        # Fetch the default scale for the selected dataset
         try:
             collection = ee.ImageCollection(dataset_id)
             first_image = collection.first()
@@ -522,58 +441,21 @@ if main_selection:
             st.error(f"Error fetching default scale: {str(e)}")
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
-    # Fetch nominal scales for all bands in the selected dataset
-    if dataset_id:
-        try:
-            # Fetch the first image from the collection to extract band information
-            collection = ee.ImageCollection(dataset_id)
-            first_image = collection.first()
-            band_names = first_image.bandNames().getInfo()
-            # Extract scales for all bands
-            band_scales = []
-            for band in band_names:
-                band_scale = first_image.select(band).projection().nominalScale().getInfo()
-                band_scales.append(band_scale)
-            # Identify unique scales using np.unique
-            unique_scales = np.unique(band_scales)
-            # Display the unique scales to the user
-            st.write(f"Nominal Scales for Bands: {band_scales}")
-            st.write(f"Unique Scales in Dataset: {unique_scales}")
-            # If there are multiple unique scales, allow the user to choose one
-            if len(unique_scales) > 1:
-                selected_scale = st.selectbox(
-                    "Select a Scale for Calculation (meters)",
-                    options=unique_scales,
-                    index=0,
-                    help="Choose a scale from the unique scales available in the dataset."
-                )
-                default_scale = selected_scale
-            else:
-                default_scale = unique_scales[0]
-                st.write(f"Default Scale for Dataset: {default_scale} meters")
-        except Exception as e:
-            st.error(f"Error fetching band scales: {str(e)}")
-            default_scale = 30  # Fallback to 30 meters if an error occurs
     selected_bands = st.multiselect(
-        "Select 1 or 2 Bands for Calculation",
         options=dataset_bands,
         default=[dataset_bands[0]] if dataset_bands else [],
-        help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
@@ -586,6 +468,7 @@ if main_selection and sub_selection:
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -595,6 +478,7 @@ if main_selection and sub_selection:
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
@@ -602,6 +486,7 @@ if main_selection and sub_selection:
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
@@ -615,17 +500,6 @@ end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
-if imagery_base == "Sentinel" and "Sentinel-2" in sub_options[sub_selection]:
-    st.markdown("<h5>Cloud Filtering</h5>", unsafe_allow_html=True)
-    pixel_cloud_threshold = st.slider(
-        "Select Maximum Pixel-Based Cloud Coverage Threshold (%)",
-        min_value=0,
-        max_value=100,
-        value=5,
-        step=5,
-        help="Individual pixels with cloud coverage exceeding this threshold will be masked."
-    )
 aggregation_period = st.selectbox(
     "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
     ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
@@ -633,6 +507,7 @@ aggregation_period = st.selectbox(
 )
 shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
 kernel_size = None
 include_boundary = None
@@ -650,23 +525,6 @@ elif shape_type.lower() == "polygon":
         help="Check to include pixels on the polygon boundary; uncheck to exclude them."
     )
-# st.markdown("<h5>Calculation Scale</h5>", unsafe_allow_html=True)
-# default_scale = ee.ImageCollection(dataset_id).first().select(0).projection().nominalScale().getInfo()
-# user_scale = st.number_input(
-#     "Enter Calculation Scale (meters) [Leave blank to use dataset's default scale]",
-#     min_value=1.0,
-#     value=float(default_scale),
-#     help=f"Default scale for this dataset is {default_scale} meters. Adjust if needed."
-# )
-st.markdown("<h5>Calculation Scale</h5>", unsafe_allow_html=True)
-user_scale = st.number_input(
-    "Enter Calculation Scale (meters) [Leave blank to use dataset's default scale]",
-    min_value=1.0,
-    value=float(default_scale),
-    help=f"Default scale for this dataset is {default_scale} meters. Adjust if needed."
-)
 file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
 locations_df = pd.DataFrame()
 original_lat_col = None
@@ -789,8 +647,10 @@ if file_upload is not None:
 if st.button(f"Calculate {custom_formula}"):
     if not locations_df.empty:
         with st.spinner("Processing Data..."):
             try:
                 results, processing_time = process_aggregation(
                     locations_df,
                     start_date_str,
@@ -800,53 +660,30 @@ if st.button(f"Calculate {custom_formula}"):
                     reducer_choice,
                     shape_type,
                     aggregation_period,
-                    original_lat_col,
-                    original_lon_col,
-                    custom_formula=custom_formula,
-                    kernel_size=kernel_size,
-                    include_boundary=include_boundary,
-                    pixel_cloud_threshold=pixel_cloud_threshold if "pixel_cloud_threshold" in locals() else 0,
-                    user_scale=user_scale
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
-                    filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         data=result_df.to_csv(index=False).encode('utf-8'),
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
-                    st.markdown("<h5>Graph Visualization</h5>", unsafe_allow_html=True)
-                    if aggregation_period.lower() == 'custom (start date to end date)':
-                        x_column = 'Date Range'
-                    elif 'Date' in result_df.columns:
-                        x_column = 'Date'
-                    elif 'Week' in result_df.columns:
-                        x_column = 'Week'
-                    elif 'Month' in result_df.columns:
-                        x_column = 'Month'
-                    elif 'Year' in result_df.columns:
-                        x_column = 'Year'
-                    else:
-                        st.warning("No valid time column found for plotting.")
-                        st.stop()
-                    y_column = 'Calculated Value'
-                    fig = px.line(
-                        result_df,
-                        x=x_column,
-                        y=y_column,
-                        color='Location Name',
-                        title=f"{custom_formula} Over Time"
-                    )
-                    st.plotly_chart(fig)
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
-                    st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else:
-        st.warning("Please upload a valid file to proceed.")

 import ee
 import os
 import pandas as pd
 import geopandas as gpd
 from datetime import datetime
 import leafmap.foliumap as leafmap
 from xml.etree import ElementTree as XET
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time
+# Define default scale for calculations (in meters)
+DEFAULT_SCALE = 30
 # Set up the page layout
 st.set_page_config(layout="wide")
     else:
         raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
+# Function to calculate custom formula
+def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, scale=DEFAULT_SCALE):
     try:
+        band_values = {}
+        band_names = image.bandNames().getInfo()
         for band in selected_bands:
+            if band not in band_names:
+                raise ValueError(f"Band '{band}' not found in the dataset.")
+            band_values[band] = image.select(band)
         reducer = get_reducer(reducer_choice)
+        reduced_values = {}
         for band in selected_bands:
+            value = band_values[band].reduceRegion(
                 reducer=reducer,
                 geometry=geometry,
                 scale=scale
             ).get(band).getInfo()
             reduced_values[band] = float(value if value is not None else 0)
         formula = custom_formula
         for band in selected_bands:
             formula = formula.replace(band, str(reduced_values[band]))
         result = eval(formula, {"__builtins__": {}}, reduced_values)
         if not isinstance(result, (int, float)):
             raise ValueError("Formula did not result in a numeric value.")
         return ee.Image.constant(result).rename('custom_result')
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
+def aggregate_data_weekly(collection):
+    def set_week_start(image):
         date = ee.Date(image.get('system:time_start'))
+        days_since_week_start = date.getRelative('day', 'week')
+        offset = ee.Number(days_since_week_start).multiply(-1)
+        week_start = date.advance(offset, 'day')
+        return image.set('week_start', week_start.format('YYYY-MM-dd'))
+    collection = collection.map(set_week_start)
+    grouped_by_week = collection.aggregate_array('week_start').distinct()
+    def calculate_weekly_mean(week_start):
+        weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
+        weekly_mean = weekly_collection.mean()
+        return weekly_mean.set('week_start', week_start)
+    weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
+    return ee.ImageCollection(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
+def aggregate_data_daily(collection):
+    def set_day_start(image):
+        date = ee.Date(image.get('system:time_start'))
+        day_start = date.format('YYYY-MM-dd')
+        return image.set('day_start', day_start)
+    collection = collection.map(set_day_start)
+    grouped_by_day = collection.aggregate_array('day_start').distinct()
+    def calculate_daily_mean(day_start):
+        daily_collection = collection.filter(ee.Filter.eq('day_start', day_start))
+        daily_mean = daily_collection.mean()
+        return daily_mean.set('day_start', day_start)
+    daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
+    return ee.ImageCollection(daily_images)
+# Worker function for processing a single geometry
+def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None):
     if shape_type.lower() == "point":
         latitude = row.get('latitude')
         longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
+            return None  # Skip invalid points
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
+            buffer_size = 45  # 90m x 90m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
+            buffer_size = 75  # 150m x 150m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
+        else:  # Point
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
+            return None  # Skip invalid polygons
+    # Filter and aggregate the image collection
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
+        .filterBounds(roi) \
+        .select(selected_bands)  # Filter bands here
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'daily':
         collection = aggregate_data_daily(collection)
     elif aggregation_period.lower() == 'weekly':
+        collection = aggregate_data_weekly(collection)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
+    # Process each image in the collection
     image_list = collection.toList(collection.size())
+    processed_days = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'daily':
             timestamp = image.get('day_start')
+            period_label = 'Day'
             date = ee.String(timestamp).getInfo()
         elif aggregation_period.lower() == 'weekly':
             timestamp = image.get('week_start')
             period_label = 'Week'
             date = ee.String(timestamp).getInfo()
         elif aggregation_period.lower() == 'monthly':
             timestamp = image.get('month')
             period_label = 'Month'
             timestamp = image.get('year')
             period_label = 'Year'
             date = ee.Date(timestamp).format('YYYY').getInfo()
+        index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=DEFAULT_SCALE)
         try:
             index_value = index_image.reduceRegion(
                 reducer=get_reducer(reducer_choice),
                 geometry=roi,
+                scale=DEFAULT_SCALE
             ).get('custom_result')
             calculated_value = index_value.getInfo()
             if isinstance(calculated_value, (int, float)):
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
+                    result['Latitude'] = latitude
+                    result['Longitude'] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
+# Main processing function
+def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
+    start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
                 shape_type,
                 aggregation_period,
                 custom_formula,
                 kernel_size,
+                include_boundary
             )
             futures.append(future)
         completed = 0
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
+    # End timing the process
     end_time = time.time()
+    processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
                 'Calculated Value': 'mean'
             }
             if shape_type.lower() == 'point':
+                agg_dict['Latitude'] = 'first'
+                agg_dict['Longitude'] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
+            aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
+            return aggregated_output.to_dict(orient='records'), processing_time  # Return processing time
         else:
+            return result_df.to_dict(orient='records'), processing_time
+    return [], processing_time
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODIS", "VIIRS", "Custom Input"], index=0)
+# Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
             if custom_dataset_id.startswith("ee.ImageCollection("):
                 custom_dataset_id = custom_dataset_id.replace("ee.ImageCollection('", "").replace("')", "")
             collection = ee.ImageCollection(custom_dataset_id)
+            band_names = collection.first().bandNames().getInfo()
             data = {
                 f"Custom Dataset: {custom_dataset_id}": {
                     "sub_options": {custom_dataset_id: f"Custom Dataset ({custom_dataset_id})"},
     else:
         st.warning("Please enter a custom dataset ID to proceed.")
         data = {}
 if not data:
     st.error("No valid dataset available. Please check your inputs.")
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
         st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
         st.write(f"Dataset ID: {sub_selection}")
         dataset_id = sub_selection
         try:
             collection = ee.ImageCollection(dataset_id)
             first_image = collection.first()
             st.error(f"Error fetching default scale: {str(e)}")
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
     selected_bands = st.multiselect(
+        "Select Bands for Calculation",
         options=dataset_bands,
         default=[dataset_bands[0]] if dataset_bands else [],
+        help=f"Select bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
     "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
     ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
 )
 shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
 kernel_size = None
 include_boundary = None
         help="Check to include pixels on the polygon boundary; uncheck to exclude them."
     )
 file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
 locations_df = pd.DataFrame()
 original_lat_col = None
 if st.button(f"Calculate {custom_formula}"):
     if not locations_df.empty:
+        # Use a spinner to indicate data processing
         with st.spinner("Processing Data..."):
             try:
+                # Call the aggregation function and capture results and processing time
                 results, processing_time = process_aggregation(
                     locations_df,
                     start_date_str,
                     reducer_choice,
                     shape_type,
                     aggregation_period,
+                    custom_formula,
+                    kernel_size,
+                    include_boundary
                 )
+                # Check if results were generated
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
+                    # Generate a downloadable CSV file
+                    filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         data=result_df.to_csv(index=False).encode('utf-8'),
                         file_name=filename,
                         mime='text/csv'
                     )
+                    # Display processing time
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
+                    st.info(f"Total processing time: {processing_time:.2f} seconds.")  # Show processing time even if no results
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else:
+        st.warning("Please upload a file to proceed.")