Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on 3 days ago

Commit

a57f55b

verified ·

1 Parent(s): fdee337

update app.py

Browse files

Files changed (1) hide show

app.py +184 -144

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import ee
 import os
 import pandas as pd
 import geopandas as gpd
-from datetime import datetime
 import leafmap.foliumap as leafmap
 import re
 from shapely.geometry import base
@@ -143,68 +143,98 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
-def aggregate_data_custom(collection):
-    collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
-    grouped_by_day = collection.aggregate_array('day').distinct()
-    def calculate_daily_mean(day):
-        daily_collection = collection.filter(ee.Filter.eq('day', day))
-        daily_mean = daily_collection.mean()
-        return daily_mean.set('day', day)
-    daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
-def aggregate_data_weekly(collection):
-    def set_week_start(image):
-        date = ee.Date(image.get('system:time_start'))
-        days_since_week_start = date.getRelative('day', 'week')
-        offset = ee.Number(days_since_week_start).multiply(-1)
-        week_start = date.advance(offset, 'day')
-        return image.set('week_start', week_start.format('YYYY-MM-dd'))
-    collection = collection.map(set_week_start)
-    grouped_by_week = collection.aggregate_array('week_start').distinct()
-    def calculate_weekly_mean(week_start):
-        weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
-        weekly_mean = weekly_collection.mean()
-        return weekly_mean.set('week_start', week_start)
-    weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
     return ee.ImageCollection(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
-    collection = collection.filterDate(start_date, end_date)
-    collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
-    grouped_by_month = collection.aggregate_array('month').distinct()
-    def calculate_monthly_mean(month):
-        monthly_collection = collection.filter(ee.Filter.eq('month', month))
-        monthly_mean = monthly_collection.mean()
-        return monthly_mean.set('month', month)
-    monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
     return ee.ImageCollection(monthly_images)
-def aggregate_data_yearly(collection):
-    collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
-    grouped_by_year = collection.aggregate_array('year').distinct()
-    def calculate_yearly_mean(year):
-        yearly_collection = collection.filter(ee.Filter.eq('year', year))
-        yearly_mean = yearly_collection.mean()
-        return yearly_mean.set('year', year)
-    yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
-def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
     if shape_type.lower() == "point":
-        latitude = row.get(lat_col)
-        longitude = row.get(lon_col)
         if pd.isna(latitude) or pd.isna(longitude):
-            return None  # Skip invalid points
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
-            buffer_size = 45  # 90m x 90m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
-            buffer_size = 75  # 150m x 150m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
-        else:  # Point
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
@@ -214,47 +244,50 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
-            return None  # Skip invalid polygons
-    # Filter and aggregate the image collection
-    collection = ee.ImageCollection(dataset_id) \
-        .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
-        .filterBounds(roi)
-    if aggregation_period.lower() == 'custom (start date to end date)':
-        collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
-        collection = aggregate_data_weekly(collection)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
-        collection = aggregate_data_yearly(collection)
     # Process each image in the collection
     image_list = collection.toList(collection.size())
-    processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
-        if aggregation_period.lower() == 'custom (start date to end date)':
-            timestamp = image.get('day')
-            period_label = 'Date'
-            date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'weekly':
-            timestamp = image.get('week_start')
-            period_label = 'Week'
-            date = ee.String(timestamp).getInfo()
-            if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
-                pd.to_datetime(date) > pd.to_datetime(end_date_str) or
-                date in processed_weeks):
-                continue
-            processed_weeks.add(date)
         elif aggregation_period.lower() == 'monthly':
-            timestamp = image.get('month')
-            period_label = 'Month'
-            date = ee.Date(timestamp).format('YYYY-MM').getInfo()
         elif aggregation_period.lower() == 'yearly':
-            timestamp = image.get('year')
-            period_label = 'Year'
-            date = ee.Date(timestamp).format('YYYY').getInfo()
         index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
         try:
             index_value = index_image.reduceRegion(
                 reducer=get_reducer(reducer_choice),
@@ -262,29 +295,30 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                 scale=30
             ).get('custom_result')
             calculated_value = index_value.getInfo()
             if isinstance(calculated_value, (int, float)):
                 result = {
                     'Location Name': location_name,
                     period_label: date,
-                    'Start Date': start_date_str,
-                    'End Date': end_date_str,
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
-                    result[lat_col] = latitude
-                    result[lon_col] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
-def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
-    start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
@@ -299,12 +333,13 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 shape_type,
                 aggregation_period,
                 custom_formula,
                 kernel_size,
-                include_boundary,
-                lat_col,
-                lon_col
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
@@ -314,26 +349,14 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
-    # End timing the process
     end_time = time.time()
-    processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
-        if aggregation_period.lower() == 'custom (start date to end date)':
-            agg_dict = {
-                'Start Date': 'first',
-                'End Date': 'first',
-                'Calculated Value': 'mean'
-            }
-            if shape_type.lower() == 'point':
-                agg_dict[lat_col] = 'first'
-                agg_dict[lon_col] = 'first'
-            aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
-            aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
-            return aggregated_output.to_dict(orient='records'), processing_time  # Return processing time
-        else:
-            return result_df.to_dict(orient='records'), processing_time
-    return [], processing_time
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
@@ -341,6 +364,7 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
@@ -392,10 +416,12 @@ if not data:
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -405,6 +431,7 @@ if main_selection:
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
@@ -414,9 +441,11 @@ if main_selection and sub_selection:
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
@@ -429,6 +458,7 @@ if main_selection and sub_selection:
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -438,6 +468,7 @@ if main_selection and sub_selection:
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
@@ -445,6 +476,7 @@ if main_selection and sub_selection:
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
@@ -453,14 +485,14 @@ reducer_choice = st.selectbox(
     index=0
 )
-start_date = st.date_input("Start Date", value=pd.to_datetime('2024-11-01'))
-end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
-    "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
-    ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
     index=0
 )
@@ -471,7 +503,7 @@ include_boundary = None
 if shape_type.lower() == "point":
     kernel_size = st.selectbox(
-        "Select Calculation Area(e.g, Point , 3x3 Kernel , 5x5 Kernel)",
         ["Point", "3x3 Kernel", "5x5 Kernel"],
         index=0,
         help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
@@ -485,46 +517,51 @@ elif shape_type.lower() == "polygon":
 file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
 locations_df = pd.DataFrame()
 if file_upload is not None:
     if shape_type.lower() == "point":
         if file_upload.name.endswith('.csv'):
-            # Read the CSV file
             locations_df = pd.read_csv(file_upload)
-            # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
-            # Dynamically populate dropdown menus for latitude and longitude
             col1, col2 = st.columns(2)
             with col1:
-                lat_col = st.selectbox(
-                    "Select the Latitude column",
-                    options=locations_df.columns,
-                    help="Choose the column containing latitude values."
                 )
             with col2:
-                lon_col = st.selectbox(
-                    "Select the Longitude column",
-                    options=locations_df.columns,
-                    help="Choose the column containing longitude values."
                 )
-            # Validate the selected columns contain numeric data
-            if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
-                st.error("Error: Selected Latitude and Longitude columns must contain numeric values.")
                 st.stop()
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
-                # Extract latitude and longitude from geometry
-                locations_df['original_latitude'] = locations_df['geometry'].y
-                locations_df['original_longitude'] = locations_df['geometry'].x
-                # Preserve original column names
-                lat_col = 'original_latitude'
-                lon_col = 'original_longitude'
             else:
                 st.error("GeoJSON file doesn't contain geometry column")
                 st.stop()
@@ -547,25 +584,24 @@ if file_upload is not None:
                     st.error("No valid Point data found in the KML file.")
                 else:
                     locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
-                    locations_df['original_latitude'] = locations_df['geometry'].y
-                    locations_df['original_longitude'] = locations_df['geometry'].x
-                    # Preserve original column names
-                    lat_col = 'original_latitude'
-                    lon_col = 'original_longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
-        # Display map for points if we have valid data
-        if not locations_df.empty and lat_col in locations_df.columns and lon_col in locations_df.columns:
-            m = leafmap.Map(center=[locations_df[lat_col].mean(), locations_df[lon_col].mean()], zoom=10)
             for _, row in locations_df.iterrows():
-                latitude = row[lat_col]
-                longitude = row[lon_col]
                 if pd.isna(latitude) or pd.isna(longitude):
                     continue
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
@@ -595,7 +631,7 @@ if file_upload is not None:
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
-        # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
             centroid_lon = locations_df.geometry.centroid.x.mean()
@@ -621,16 +657,18 @@ if st.button(f"Calculate {custom_formula}"):
                     reducer_choice,
                     shape_type,
                     aggregation_period,
                     custom_formula,
                     kernel_size,
-                    include_boundary,
-                    lat_col=lat_col if shape_type.lower() == "point" else None,
-                    lon_col=lon_col if shape_type.lower() == "point" else None
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
@@ -638,10 +676,12 @@ if st.button(f"Calculate {custom_formula}"):
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else:

 import os
 import pandas as pd
 import geopandas as gpd
+from datetime import datetime, timedelta
 import leafmap.foliumap as leafmap
 import re
 from shapely.geometry import base
         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
+def aggregate_data_daily(collection, start_date, end_date):
+    # Create date range for daily aggregation
+    dates = pd.date_range(start=start_date, end=end_date, freq='D')
+    def get_daily_image(date):
+        date_str = date.strftime('%Y-%m-%d')
+        daily_collection = collection.filterDate(ee.Date(date_str), ee.Date(date_str).advance(1, 'day'))
+        return daily_collection.mean().set('date', date_str)
+    daily_images = []
+    for date in dates:
+        daily_images.append(get_daily_image(date))
     return ee.ImageCollection(daily_images)
+def aggregate_data_weekly(collection, start_date, end_date):
+    # Generate weekly intervals starting exactly from start_date
+    start_date = pd.to_datetime(start_date)
+    end_date = pd.to_datetime(end_date)
+    weekly_intervals = []
+    current_date = start_date
+    while current_date <= end_date:
+        next_date = current_date + timedelta(days=6)  # 7-day week (inclusive)
+        if next_date > end_date:
+            next_date = end_date
+        weekly_intervals.append((current_date.strftime('%Y-%m-%d'), next_date.strftime('%Y-%m-%d')))
+        current_date = next_date + timedelta(days=1)
+    def get_weekly_image(start, end):
+        weekly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
+        return weekly_collection.mean().set('week_start', start).set('week_end', end)
+    weekly_images = []
+    for start, end in weekly_intervals:
+        weekly_images.append(get_weekly_image(start, end))
     return ee.ImageCollection(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
+    # Create monthly aggregation
+    dates = pd.date_range(start=start_date, end=end_date, freq='MS')  # Month Start
+    def get_monthly_image(date):
+        date_str = date.strftime('%Y-%m-%d')
+        next_month = date + pd.offsets.MonthBegin(1)
+        next_month_str = next_month.strftime('%Y-%m-%d')
+        monthly_collection = collection.filterDate(ee.Date(date_str), ee.Date(next_month_str))
+        return monthly_collection.mean().set('month', date.strftime('%Y-%m'))
+    monthly_images = []
+    for date in dates:
+        monthly_images.append(get_monthly_image(date))
     return ee.ImageCollection(monthly_images)
+def aggregate_data_yearly(collection, start_date, end_date):
+    # Create yearly aggregation
+    years = range(pd.to_datetime(start_date).year, pd.to_datetime(end_date).year + 1)
+    def get_yearly_image(year):
+        start = f"{year}-01-01"
+        end = f"{year+1}-01-01"
+        yearly_collection = collection.filterDate(ee.Date(start), ee.Date(end))
+        return yearly_collection.mean().set('year', str(year))
+    yearly_images = []
+    for year in years:
+        yearly_images.append(get_yearly_image(year))
     return ee.ImageCollection(yearly_images)
+def aggregate_data_custom(collection, start_date, end_date):
+    # Custom aggregation (entire date range)
+    collection = collection.filterDate(ee.Date(start_date), ee.Date(end_date))
+    return collection.mean().set('start_date', start_date).set('end_date', end_date)
 # Worker function for processing a single geometry
+def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
     if shape_type.lower() == "point":
+        latitude = row.get('latitude')
+        longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
+            return None
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
+            buffer_size = 45
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
+            buffer_size = 75
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
+        else:
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
+            return None
+    # Filter collection by location
+    collection = ee.ImageCollection(dataset_id).filterBounds(roi)
+    # Apply temporal aggregation based on selected period
+    if aggregation_period.lower() == 'daily':
+        collection = aggregate_data_daily(collection, start_date_str, end_date_str)
+        period_label = 'Date'
     elif aggregation_period.lower() == 'weekly':
+        collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
+        period_label = 'Week'
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
+        period_label = 'Month'
     elif aggregation_period.lower() == 'yearly':
+        collection = aggregate_data_yearly(collection, start_date_str, end_date_str)
+        period_label = 'Year'
+    else:  # Custom
+        collection = ee.ImageCollection([aggregate_data_custom(collection, start_date_str, end_date_str)])
+        period_label = 'Date Range'
     # Process each image in the collection
     image_list = collection.toList(collection.size())
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
+        # Get the appropriate date label based on aggregation period
+        if aggregation_period.lower() == 'daily':
+            date = image.get('date').getInfo()
         elif aggregation_period.lower() == 'weekly':
+            date = f"{image.get('week_start').getInfo()} to {image.get('week_end').getInfo()}"
         elif aggregation_period.lower() == 'monthly':
+            date = image.get('month').getInfo()
         elif aggregation_period.lower() == 'yearly':
+            date = image.get('year').getInfo()
+        else:  # Custom
+            date = f"{image.get('start_date').getInfo()} to {image.get('end_date').getInfo()}"
+        # Calculate the custom formula
         index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
         try:
             index_value = index_image.reduceRegion(
                 reducer=get_reducer(reducer_choice),
                 scale=30
             ).get('custom_result')
             calculated_value = index_value.getInfo()
             if isinstance(calculated_value, (int, float)):
                 result = {
                     'Location Name': location_name,
                     period_label: date,
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
+                    result[original_lat_col] = latitude
+                    result[original_lon_col] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
+def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
+    start_time = time.time()
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
                 shape_type,
                 aggregation_period,
                 custom_formula,
+                original_lat_col,
+                original_lon_col,
                 kernel_size,
+                include_boundary
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     end_time = time.time()
+    processing_time = end_time - start_time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
+        return result_df.to_dict(orient='records'), processing_time
+    return [], processing_time
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
     index=0
 )
+start_date = st.date_input("Start Date", value=pd.to_datetime('2024-01-01'))
+end_date = st.date_input("End Date", value=pd.to_datetime('2024-01-31'))
 start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
+    "Select Aggregation Period",
+    ["Daily", "Weekly", "Monthly", "Yearly", "Custom (Complete Date Range)"],
     index=0
 )
 if shape_type.lower() == "point":
     kernel_size = st.selectbox(
+        "Select Calculation Area",
         ["Point", "3x3 Kernel", "5x5 Kernel"],
         index=0,
         help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
 file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
 locations_df = pd.DataFrame()
+original_lat_col = None
+original_lon_col = None
 if file_upload is not None:
     if shape_type.lower() == "point":
         if file_upload.name.endswith('.csv'):
             locations_df = pd.read_csv(file_upload)
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
+            all_columns = locations_df.columns.tolist()
             col1, col2 = st.columns(2)
             with col1:
+                original_lat_col = st.selectbox(
+                    "Select Latitude Column",
+                    options=all_columns,
+                    index=all_columns.index('latitude') if 'latitude' in all_columns else 0,
+                    help="Select the column containing latitude values"
                 )
             with col2:
+                original_lon_col = st.selectbox(
+                    "Select Longitude Column",
+                    options=all_columns,
+                    index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
+                    help="Select the column containing longitude values"
                 )
+            if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
+                st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
+            locations_df = locations_df.rename(columns={
+                original_lat_col: 'latitude',
+                original_lon_col: 'longitude'
+            })
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
+                locations_df['latitude'] = locations_df['geometry'].y
+                locations_df['longitude'] = locations_df['geometry'].x
+                original_lat_col = 'latitude'
+                original_lon_col = 'longitude'
             else:
                 st.error("GeoJSON file doesn't contain geometry column")
                 st.stop()
                     st.error("No valid Point data found in the KML file.")
                 else:
                     locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
+                    locations_df['latitude'] = locations_df['geometry'].y
+                    locations_df['longitude'] = locations_df['geometry'].x
+                    original_lat_col = 'latitude'
+                    original_lon_col = 'longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
+        if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
+            m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
             for _, row in locations_df.iterrows():
+                latitude = row['latitude']
+                longitude = row['longitude']
                 if pd.isna(latitude) or pd.isna(longitude):
                     continue
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
             centroid_lon = locations_df.geometry.centroid.x.mean()
                     reducer_choice,
                     shape_type,
                     aggregation_period,
+                    original_lat_col,
+                    original_lon_col,
                     custom_formula,
                     kernel_size,
+                    include_boundary
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else: