Spaces:

YashMK89
/

GEE_Calculator

Sleeping

File size: 47,807 Bytes

import streamlit as st
import json
import ee
import os
import pandas as pd
import geopandas as gpd
from datetime import datetime
import leafmap.foliumap as leafmap
import re
from shapely.geometry import base
# from lxml import etree
# from xml.etree import ElementTree as ET
from xml.etree import ElementTree as XET  


# Set up the page layout
st.set_page_config(layout="wide")

# Custom button styling
m = st.markdown(
    """
    <style>
    div.stButton > button:first-child {
        background-color: #006400;
        color:#ffffff;
    }
    </style>""",
    unsafe_allow_html=True,
)

# Logo
st.write(
    f"""
    <div style="display: flex; justify-content: space-between; align-items: center;">
        <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/ISRO_Logo.png" style="width: 20%; margin-right: auto;">
        <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/SAC_Logo.png" style="width: 20%; margin-left: auto;">
    </div>
    """,
    unsafe_allow_html=True,
)

# Title
st.markdown(
    f"""
    <div style="display: flex; flex-direction: column; align-items: center;">
        <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/BHOOMI_1.png" style="width: 20%;">
        <h3 style="text-align: center; margin: 0;">(Bandwise Harmonization & Optimized Output for multispectral integration)</h3>
    </div>
    <hr>
    """,
    unsafe_allow_html=True,
)
# st.markdown(
#     f"""
#     <div style="text-align: center; background-image: url('https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/B1.jpg'); background-size: cover; padding: 20px;">
#         <h1 style="display: inline-block; margin: 0;">
#             <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/B1.png" style="width: 20%; vertical-align: middle; margin-right: 10px;">
#             BHOOMI
#         </h1>
#         <h3 style="margin: 0;">(Bandwise Harmonization & Optimized Output for multispectral integration)</h3>
#     </div>
#     <hr>
#     """,
#     unsafe_allow_html=True,
# )
# st.write("<h4><div style='text-align: center;'>User Inputs</div></h4>", unsafe_allow_html=True)

# st.markdown(
#     f"""
#     <div style="position: relative; text-align: center; padding: 20px;">
#         <div style="background-image: url('https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/B1.jpg'); background-size: cover; position: absolute; top: 0; left: 0; right: 0; bottom: 0; z-index: 1;"></div>
#         <div style="background-color: rgba(255, 255, 255, 0.2); position: absolute; top: 0; left: 0; right: 0; bottom: 0; z-index: 2;"></div>
#         <div style="position: relative; z-index: 3;">
#             <div style="display: flex; justify-content: space-between; align-items: center;">
#                 <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/ISRO_Logo.png" style="width: 20%; margin-right: auto;">
#                 <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/SAC_Logo.png" style="width: 20%; margin-left: auto;">
#             </div>
#             <h1 style="display: inline-block; margin: 0;">
#                 BHOOMI
#             </h1>
#             <h3 style="margin: 0;">(Bandwise Harmonization & Optimized Output for multispectral integration)</h3>
#         </div>
#     </div>
#     <hr>
#     """,
#     unsafe_allow_html=True,
# )

st.markdown(
    f"""
    <h4 style="text-align: center;">User Inputs</h4>
    """,
    unsafe_allow_html=True,
)
# Authenticate and initialize Earth Engine
earthengine_credentials = os.environ.get("EE_Authentication")

# Initialize Earth Engine with secret credentials
os.makedirs(os.path.expanduser("~/.config/earthengine/"), exist_ok=True)
with open(os.path.expanduser("~/.config/earthengine/credentials"), "w") as f:
    f.write(earthengine_credentials)

ee.Initialize(project='ee-yashsacisro24')

st.write("<h5>Image Collection</h5>", unsafe_allow_html=True)

# Imagery base selection
imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODIS", "Custom Input"], index=0)

# Load the appropriate dataset based on imagery base
if imagery_base == "Sentinel":
    dataset_file = "sentinel_datasets.json"
    with open(dataset_file) as f:
        data = json.load(f)
elif imagery_base == "Landsat":
    dataset_file = "landsat_datasets.json"
    with open(dataset_file) as f:
        data = json.load(f)
elif imagery_base == "MODIS":
    dataset_file = "modis_datasets.json"
    with open(dataset_file) as f:
        data = json.load(f)
elif imagery_base == "Custom Input":
    custom_dataset_id = st.text_input("Enter Custom Earth Engine Dataset ID (e.g., ee.ImageCollection('AHN/AHN4'))", value="")
    if custom_dataset_id:
        try:
            # Remove potential "ee.ImageCollection()" wrapper for simplicity
            if custom_dataset_id.startswith("ee.ImageCollection("):
                custom_dataset_id = custom_dataset_id.replace("ee.ImageCollection('", "").replace("')", "")
            # Fetch dataset info from GEE
            collection = ee.ImageCollection(custom_dataset_id)
            band_names = collection.first().bandNames().getInfo()
            data = {
                f"Custom Dataset: {custom_dataset_id}": {
                    "sub_options": {custom_dataset_id: f"Custom Dataset ({custom_dataset_id})"},
                    "bands": {custom_dataset_id: band_names}
                }
            }
            st.write(f"Fetched bands for {custom_dataset_id}: {', '.join(band_names)}")
        except Exception as e:
            st.error(f"Error fetching dataset: {str(e)}. Please check the dataset ID and ensure it's valid in Google Earth Engine.")
            data = {}
    else:
        st.warning("Please enter a custom dataset ID to proceed.")
        data = {}

# Display the title for the Streamlit app
# st.title(f"{imagery_base} Dataset")
st.markdown(
    f"""
    <hr>
    <h5><b>{imagery_base} Dataset</b></h5>
    """,
    unsafe_allow_html=True,
)
# Select dataset category (main selection)
if data:
    main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
else:
    main_selection = None

# Initialize sub_selection and dataset_id as None
sub_selection = None
dataset_id = None

# If a category is selected, display the sub-options (specific datasets)
if main_selection:
    sub_options = data[main_selection]["sub_options"]
    sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))

    # Display the selected dataset ID based on user input
    if sub_selection:
        st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
        st.write(f"Dataset ID: {sub_selection}")
        dataset_id = sub_selection  # Use the key directly as the dataset ID

# Earth Engine Index Calculator Section
# st.header("Earth Engine Index Calculator")

st.markdown(
    f"""
    <hr>
    <h5><b>Earth Engine Index Calculator</b></h5>
    """,
    unsafe_allow_html=True,
)
# Load band information based on selected dataset
if main_selection and sub_selection:
    dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
    st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")

    # Allow user to select 1 or 2 bands
    selected_bands = st.multiselect(
        "Select 1 or 2 Bands for Calculation",
        options=dataset_bands,
        default=[dataset_bands[0]] if dataset_bands else [],
        help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
    )

    # Ensure minimum 1 and maximum 2 bands are selected
    if len(selected_bands) < 1:
        st.warning("Please select at least one band.")
        st.stop()

    # Show custom formula input if bands are selected
    if selected_bands:
        # Provide a default formula based on the number of selected bands
        if len(selected_bands) == 1:
            default_formula = f"{selected_bands[0]}"
            example = f"'{selected_bands[0]} * 2' or '{selected_bands[0]} + 1'"
        else:  # len(selected_bands) == 2
            default_formula = f"({selected_bands[0]} - {selected_bands[1]}) / ({selected_bands[0]} + {selected_bands[1]})"
            example = f"'{selected_bands[0]} * {selected_bands[1]} / 2' or '({selected_bands[0]} - {selected_bands[1]}) / ({selected_bands[0]} + {selected_bands[1]})'"

        custom_formula = st.text_input(
            "Enter Custom Formula (e.g (B8 - B4) / (B8 + B4) , B4*B3/2)",
            value=default_formula,
            help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
        )

        # Validate the formula
        def validate_formula(formula, selected_bands):
            allowed_chars = set(" +-*/()0123456789.")
            terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
            invalid_terms = [term for term in terms if term not in selected_bands]
            if invalid_terms:
                return False, f"Invalid terms in formula: {', '.join(invalid_terms)}. Use only {', '.join(selected_bands)}."
            if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
            return True, ""

        is_valid, error_message = validate_formula(custom_formula, selected_bands)
        if not is_valid:
            st.error(error_message)
            st.stop()
        elif not custom_formula:
            st.warning("Please enter a custom formula to proceed.")
            st.stop()

        # Display the validated formula
        st.write(f"Custom Formula: {custom_formula}")
        
# The rest of your code (reducer, geometry conversion, date input, aggregation, etc.) remains unchanged...

# Function to get the corresponding reducer based on user input
def get_reducer(reducer_name):
    reducers = {
        'mean': ee.Reducer.mean(),
        'sum': ee.Reducer.sum(),
        'median': ee.Reducer.median(),
        'min': ee.Reducer.min(),
        'max': ee.Reducer.max(),
        'count': ee.Reducer.count(),
    }
    return reducers.get(reducer_name.lower(), ee.Reducer.mean())

# Streamlit selectbox for reducer choice
reducer_choice = st.selectbox(
    "Select Reducer (e.g, mean , sum , median , min , max , count)",
    ['mean', 'sum', 'median', 'min', 'max', 'count'],
    index=0  # Default to 'mean'
)

# # Function to convert geometry to Earth Engine format
# def convert_to_ee_geometry(geometry):
#     if isinstance(geometry, base.BaseGeometry):
#         if geometry.is_valid:
#             geojson = geometry.__geo_interface__
#             return ee.Geometry(geojson)
#         else:
#             raise ValueError("Invalid geometry: The polygon geometry is not valid.")
#     elif isinstance(geometry, dict) or isinstance(geometry, str):
#         try:
#             if isinstance(geometry, str):
#                 geometry = json.loads(geometry)
#             if 'type' in geometry and 'coordinates' in geometry:
#                 return ee.Geometry(geometry)
#             else:
#                 raise ValueError("GeoJSON format is invalid.")
#         except Exception as e:
#             raise ValueError(f"Error parsing GeoJSON: {e}")
#     elif isinstance(geometry, str) and geometry.lower().endswith(".kml"):
#         try:
#             tree = ET.parse(geometry)
#             kml_root = tree.getroot()
#             kml_namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
#             coordinates = kml_root.findall(".//kml:coordinates", kml_namespace)
#             if coordinates:
#                 coords_text = coordinates[0].text.strip()
#                 coords = coords_text.split()
#                 coords = [tuple(map(float, coord.split(','))) for coord in coords]
#                 geojson = {"type": "Polygon", "coordinates": [coords]}
#                 return ee.Geometry(geojson)
#             else:
#                 raise ValueError("KML does not contain valid coordinates.")
#         except Exception as e:
#             raise ValueError(f"Error parsing KML: {e}")
#     else:
#         raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")

# Function to convert geometry to Earth Engine format
def convert_to_ee_geometry(geometry):
    st.write(f"Debug: convert_to_ee_geometry called with type - {type(geometry)}")  # Debug input type
    if isinstance(geometry, base.BaseGeometry):
        if geometry.is_valid:
            geojson = geometry.__geo_interface__
            st.write(f"Debug: Converting Shapely geometry to GeoJSON - {geojson}")  # Debug GeoJSON
            return ee.Geometry(geojson)
        else:
            raise ValueError("Invalid geometry: The polygon geometry is not valid.")
    elif isinstance(geometry, dict):
        if 'type' in geometry and 'coordinates' in geometry:
            return ee.Geometry(geometry)
        else:
            raise ValueError("GeoJSON format is invalid.")
    elif isinstance(geometry, str):
        try:
            # If it’s a JSON string, parse it
            parsed = json.loads(geometry)
            if 'type' in parsed and 'coordinates' in parsed:
                return ee.Geometry(parsed)
            else:
                raise ValueError("GeoJSON string format is invalid.")
        except json.JSONDecodeError:
            # If it’s a KML string (not a file path)
            try:
                root = XET.fromstring(geometry)
                ns = {'kml': 'http://www.opengis.net/kml/2.2'}
                coords_elem = root.find('.//kml:Polygon//kml:coordinates', ns)
                if coords_elem is not None:
                    coords_text = ' '.join(coords_elem.text.split())
                    st.write(f"Debug: KML string coordinates - {coords_text}")  # Debug KML parsing
                    coords = [tuple(map(float, coord.split(','))) for coord in coords_text.split()]
                    geojson = {"type": "Polygon", "coordinates": [coords]}
                    return ee.Geometry(geojson)
                else:
                    raise ValueError("KML string does not contain valid coordinates.")
            except Exception as e:
                raise ValueError(f"Error parsing KML string: {e}")
    else:
        raise ValueError(f"Unsupported geometry input type: {type(geometry)}. Supported types are Shapely, GeoJSON, and KML string.")
        
# Date Input for Start and End Dates
start_date = st.date_input("Start Date", value=pd.to_datetime('2024-11-01'))
end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))

# Convert start_date and end_date to string format for Earth Engine
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')

# Aggregation period selection
aggregation_period = st.selectbox(
    "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
    ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
    index=0
)

# Ask user whether they want to process 'Point' or 'Polygon' data
shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])

# Additional options based on shape type
kernel_size = None
include_boundary = None
if shape_type.lower() == "point":
    kernel_size = st.selectbox(
        "Select Calculation Area(e.g, Point , 3x3 Kernel , 5x5 Kernel)",
        ["Point", "3x3 Kernel", "5x5 Kernel"],
        index=0,
        help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
    )
elif shape_type.lower() == "polygon":
    include_boundary = st.checkbox(
        "Include Boundary Pixels",
        value=True,
        help="Check to include pixels on the polygon boundary; uncheck to exclude them."
    )

# # Ask user to upload a file based on shape type
# file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])

# if file_upload is not None:
#     # Read the user-uploaded file
#     if shape_type.lower() == "point":
#         if file_upload.name.endswith('.csv'):
#             locations_df = pd.read_csv(file_upload)
#         elif file_upload.name.endswith('.geojson'):
#             locations_df = gpd.read_file(file_upload)
#         elif file_upload.name.endswith('.kml'):
#             locations_df = gpd.read_file(file_upload)
#         else:
#             st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
#             locations_df = pd.DataFrame()

#         if 'geometry' in locations_df.columns:
#             if locations_df.geometry.geom_type.isin(['Polygon', 'MultiPolygon']).any():
#                 st.warning("The uploaded file contains polygon data. Please select 'Polygon' for processing.")
#                 st.stop()

#         with st.spinner('Processing Map...'):
#             if locations_df is not None and not locations_df.empty:
#                 if 'geometry' in locations_df.columns:
#                     locations_df['latitude'] = locations_df['geometry'].y
#                     locations_df['longitude'] = locations_df['geometry'].x

#                 if 'latitude' not in locations_df.columns or 'longitude' not in locations_df.columns:
#                     st.error("Uploaded file is missing required 'latitude' or 'longitude' columns.")
#                 else:
#                     st.write("Preview of the uploaded points data:")
#                     st.dataframe(locations_df.head())
#                     m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
#                     for _, row in locations_df.iterrows():
#                         latitude = row['latitude']
#                         longitude = row['longitude']
#                         if pd.isna(latitude) or pd.isna(longitude):
#                             continue
#                         m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
#                     st.write("Map of Uploaded Points:")
#                     m.to_streamlit()
#                     st.session_state.map_data = m

#     elif shape_type.lower() == "polygon":
#         if file_upload.name.endswith('.csv'):
#             locations_df = pd.read_csv(file_upload)
#         elif file_upload.name.endswith('.geojson'):
#             locations_df = gpd.read_file(file_upload)
#         elif file_upload.name.endswith('.kml'):
#             locations_df = gpd.read_file(file_upload)
#         else:
#             st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
#             locations_df = pd.DataFrame()

#         if 'geometry' in locations_df.columns:
#             if locations_df.geometry.geom_type.isin(['Point', 'MultiPoint']).any():
#                 st.warning("The uploaded file contains point data. Please select 'Point' for processing.")
#                 st.stop()

#         with st.spinner('Processing Map...'):
#             if locations_df is not None and not locations_df.empty:
#                 if 'geometry' not in locations_df.columns:
#                     st.error("Uploaded file is missing required 'geometry' column.")
#                 else:
#                     st.write("Preview of the uploaded polygons data:")
#                     st.dataframe(locations_df.head())
#                     centroid_lat = locations_df.geometry.centroid.y.mean()
#                     centroid_lon = locations_df.geometry.centroid.x.mean()
#                     m = leafmap.Map(center=[centroid_lat, centroid_lon], zoom=10)
#                     for _, row in locations_df.iterrows():
#                         polygon = row['geometry']
#                         if polygon.is_valid:
#                             gdf = gpd.GeoDataFrame([row], geometry=[polygon], crs=locations_df.crs)
#                             m.add_gdf(gdf=gdf, layer_name=row.get('name', 'Unnamed Polygon'))
#                     st.write("Map of Uploaded Polygons:")
#                     m.to_streamlit()
#                     st.session_state.map_data = m


# Ask user to upload a file based on shape type
file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])

if file_upload is not None:
    # Read the user-uploaded file
    if shape_type.lower() == "point":
        if file_upload.name.endswith('.csv'):
            locations_df = pd.read_csv(file_upload)
        elif file_upload.name.endswith('.geojson'):
            locations_df = gpd.read_file(file_upload)
        elif file_upload.name.endswith('.kml'):
            # Parse KML file for point data
            kml_string = file_upload.read().decode('utf-8')
            try:
                # Use xml.etree.ElementTree with unique alias
                root = XET.fromstring(kml_string)
                ns = {'kml': 'http://www.opengis.net/kml/2.2'}
                points = []
                for placemark in root.findall('.//kml:Placemark', ns):
                    name = placemark.findtext('kml:name', default=f"Point_{len(points)}", namespaces=ns)
                    coords_elem = placemark.find('.//kml:Point/kml:coordinates', ns)
                    if coords_elem is not None:
                        coords_text = coords_elem.text.strip()
                        st.write(f"Debug: Point coordinates found - {coords_text}")  # Debug output
                        coords = [c.strip() for c in coords_text.split(',')]
                        if len(coords) >= 2:  # Ensure at least lon, lat
                            lon, lat = float(coords[0]), float(coords[1])
                            points.append({'name': name, 'geometry': f"POINT ({lon} {lat})"})
                if not points:
                    st.error("No valid Point data found in the KML file.")
                    locations_df = pd.DataFrame()
                else:
                    locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
            except Exception as e:
                st.error(f"Error parsing KML file: {str(e)}")
                locations_df = pd.DataFrame()
        else:
            st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
            locations_df = pd.DataFrame()

        if 'geometry' in locations_df.columns:
            if locations_df.geometry.geom_type.isin(['Polygon', 'MultiPolygon']).any():
                st.warning("The uploaded file contains polygon data. Please select 'Polygon' for processing.")
                st.stop()

        with st.spinner('Processing Map...'):
            if locations_df is not None and not locations_df.empty:
                if 'geometry' in locations_df.columns:
                    locations_df['latitude'] = locations_df['geometry'].y
                    locations_df['longitude'] = locations_df['geometry'].x

                if 'latitude' not in locations_df.columns or 'longitude' not in locations_df.columns:
                    st.error("Uploaded file is missing required 'latitude' or 'longitude' columns.")
                else:
                    st.write("Preview of the uploaded points data:")
                    st.dataframe(locations_df.head())
                    m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
                    for _, row in locations_df.iterrows():
                        latitude = row['latitude']
                        longitude = row['longitude']
                        if pd.isna(latitude) or pd.isna(longitude):
                            continue
                        m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
                    st.write("Map of Uploaded Points:")
                    m.to_streamlit()
                    st.session_state.map_data = m

    elif shape_type.lower() == "polygon":
        if file_upload.name.endswith('.csv'):
            locations_df = pd.read_csv(file_upload)
        elif file_upload.name.endswith('.geojson'):
            locations_df = gpd.read_file(file_upload)
        elif file_upload.name.endswith('.kml'):
            # Parse KML file for polygon data
            kml_string = file_upload.read().decode('utf-8')
            try:
                root = XET.fromstring(kml_string)
                ns = {'kml': 'http://www.opengis.net/kml/2.2'}
                polygons = []
                for placemark in root.findall('.//kml:Placemark', ns):
                    name = placemark.findtext('kml:name', default=f"Polygon_{len(polygons)}", namespaces=ns)
                    coords_elem = placemark.find('.//kml:Polygon//kml:coordinates', ns)
                    if coords_elem is not None:
                        coords_text = ' '.join(coords_elem.text.split())  # Normalize whitespace
                        st.write(f"Debug: Polygon coordinates found - {coords_text}")  # Debug output
                        coord_pairs = [pair.split(',')[:2] for pair in coords_text.split() if pair]
                        if len(coord_pairs) >= 4:  # Minimum 4 points for a closed polygon
                            coords_str = " ".join([f"{float(lon)} {float(lat)}" for lon, lat in coord_pairs])
                            polygons.append({'name': name, 'geometry': f"POLYGON (({coords_str}))"})
                if not polygons:
                    st.error("No valid Polygon data found in the KML file.")
                    locations_df = pd.DataFrame()
                else:
                    locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
            except Exception as e:
                st.error(f"Error parsing KML file: {str(e)}")
                locations_df = pd.DataFrame()
        else:
            st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
            locations_df = pd.DataFrame()

        if 'geometry' in locations_df.columns:
            if locations_df.geometry.geom_type.isin(['Point', 'MultiPoint']).any():
                st.warning("The uploaded file contains point data. Please select 'Point' for processing.")
                st.stop()

        with st.spinner('Processing Map...'):
            if locations_df is not None and not locations_df.empty:
                if 'geometry' not in locations_df.columns:
                    st.error("Uploaded file is missing required 'geometry' column.")
                else:
                    st.write("Preview of the uploaded polygons data:")
                    st.dataframe(locations_df.head())
                    centroid_lat = locations_df.geometry.centroid.y.mean()
                    centroid_lon = locations_df.geometry.centroid.x.mean()
                    m = leafmap.Map(center=[centroid_lat, centroid_lon], zoom=10)
                    for _, row in locations_df.iterrows():
                        polygon = row['geometry']
                        if polygon.is_valid:
                            gdf = gpd.GeoDataFrame([row], geometry=[polygon], crs=locations_df.crs)
                            m.add_gdf(gdf=gdf, layer_name=row.get('name', 'Unnamed Polygon'))
                    st.write("Map of Uploaded Polygons:")
                    m.to_streamlit()
                    st.session_state.map_data = m

# ... (Rest of the code until convert_to_ee_geometry) ...
# Initialize session state for storing results
if 'results' not in st.session_state:
    st.session_state.results = []
if 'last_params' not in st.session_state:
    st.session_state.last_params = {}
if 'map_data' not in st.session_state:
    st.session_state.map_data = None
if 'show_example' not in st.session_state:
    st.session_state.show_example = True

# Function to check if parameters have changed
def parameters_changed():
    return (
        st.session_state.last_params.get('main_selection') != main_selection or
        st.session_state.last_params.get('dataset_id') != dataset_id or
        st.session_state.last_params.get('selected_bands') != selected_bands or
        st.session_state.last_params.get('custom_formula') != custom_formula or
        st.session_state.last_params.get('start_date_str') != start_date_str or
        st.session_state.last_params.get('end_date_str') != end_date_str or
        st.session_state.last_params.get('shape_type') != shape_type or
        st.session_state.last_params.get('file_upload') != file_upload or
        st.session_state.last_params.get('kernel_size') != kernel_size or
        st.session_state.last_params.get('include_boundary') != include_boundary
    )

# If parameters have changed, reset the results
if parameters_changed():
    st.session_state.results = []
    st.session_state.last_params = {
        'main_selection': main_selection,
        'dataset_id': dataset_id,
        'selected_bands': selected_bands,
        'custom_formula': custom_formula,
        'start_date_str': start_date_str,
        'end_date_str': end_date_str,
        'shape_type': shape_type,
        'file_upload': file_upload,
        'kernel_size': kernel_size,
        'include_boundary': include_boundary
    }

# Function to calculate custom formula
def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, scale=30):
    try:
        band_values = {}
        band_names = image.bandNames().getInfo()
        
        for band in selected_bands:
            if band not in band_names:
                raise ValueError(f"Band '{band}' not found in the dataset.")
            band_values[band] = image.select(band)

        reducer = get_reducer(reducer_choice)
        reduced_values = {}
        for band in selected_bands:
            value = band_values[band].reduceRegion(
                reducer=reducer,
                geometry=geometry,
                scale=scale
            ).get(band).getInfo()
            reduced_values[band] = float(value if value is not None else 0)

        formula = custom_formula
        for band in selected_bands:
            formula = formula.replace(band, str(reduced_values[band]))

        result = eval(formula, {"__builtins__": {}}, reduced_values)
        if not isinstance(result, (int, float)):
            raise ValueError("Formula did not result in a numeric value.")
        
        return ee.Image.constant(result).rename('custom_result')
    
    except ZeroDivisionError:
        st.error("Error: Division by zero in the formula.")
        return ee.Image(0).rename('custom_result').set('error', 'Division by zero')
    except SyntaxError:
        st.error(f"Error: Invalid syntax in formula '{custom_formula}'.")
        return ee.Image(0).rename('custom_result').set('error', 'Invalid syntax')
    except ValueError as e:
        st.error(f"Error: {str(e)}")
        return ee.Image(0).rename('custom_result').set('error', str(e))
    except Exception as e:
        st.error(f"Unexpected error: {e}")
        return ee.Image(0).rename('custom_result').set('error', str(e))

# Function to calculate index for a period
def calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice):
    return calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice)

# Aggregation functions
def aggregate_data_custom(collection):
    collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
    grouped_by_day = collection.aggregate_array('day').distinct()
    def calculate_daily_mean(day):
        daily_collection = collection.filter(ee.Filter.eq('day', day))
        daily_mean = daily_collection.mean()
        return daily_mean.set('day', day)
    daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
    return ee.ImageCollection(daily_images)

def aggregate_data_weekly(collection):
    def set_week_start(image):
        date = ee.Date(image.get('system:time_start'))
        days_since_week_start = date.getRelative('day', 'week')
        offset = ee.Number(days_since_week_start).multiply(-1)
        week_start = date.advance(offset, 'day')
        return image.set('week_start', week_start.format('YYYY-MM-dd'))
    collection = collection.map(set_week_start)
    grouped_by_week = collection.aggregate_array('week_start').distinct()
    def calculate_weekly_mean(week_start):
        weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
        weekly_mean = weekly_collection.mean()
        return weekly_mean.set('week_start', week_start)
    weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
    return ee.ImageCollection(weekly_images)

def aggregate_data_monthly(collection, start_date, end_date):
    collection = collection.filterDate(start_date, end_date)
    collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
    grouped_by_month = collection.aggregate_array('month').distinct()
    def calculate_monthly_mean(month):
        monthly_collection = collection.filter(ee.Filter.eq('month', month))
        monthly_mean = monthly_collection.mean()
        return monthly_mean.set('month', month)
    monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
    return ee.ImageCollection(monthly_images)

def aggregate_data_yearly(collection):
    collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
    grouped_by_year = collection.aggregate_array('year').distinct()
    def calculate_yearly_mean(year):
        yearly_collection = collection.filter(ee.Filter.eq('year', year))
        yearly_mean = yearly_collection.mean()
        return yearly_mean.set('year', year)
    yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
    return ee.ImageCollection(yearly_images)

# Process aggregation function
def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
    aggregated_results = []
    
    if not custom_formula:
        st.error("Custom formula cannot be empty. Please provide a formula.")
        return aggregated_results
    
    total_steps = len(locations_df)
    progress_bar = st.progress(0)
    progress_text = st.empty()
    
    with st.spinner('Processing data...'):
        if shape_type.lower() == "point":
            for idx, row in locations_df.iterrows():
                latitude = row.get('latitude')
                longitude = row.get('longitude')
                if pd.isna(latitude) or pd.isna(longitude):
                    st.warning(f"Skipping location {idx} with missing latitude or longitude")
                    continue
                
                location_name = row.get('name', f"Location_{idx}")
                
                if kernel_size == "3x3 Kernel":
                    buffer_size = 45  # 90m x 90m
                    roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
                elif kernel_size == "5x5 Kernel":
                    buffer_size = 75  # 150m x 150m
                    roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
                else:  # Point
                    roi = ee.Geometry.Point([longitude, latitude])
                
                collection = ee.ImageCollection(dataset_id) \
                    .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
                    .filterBounds(roi)
                
                if aggregation_period.lower() == 'custom (start date to end date)':
                    collection = aggregate_data_custom(collection)
                elif aggregation_period.lower() == 'weekly':
                    collection = aggregate_data_weekly(collection)
                elif aggregation_period.lower() == 'monthly':
                    collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
                elif aggregation_period.lower() == 'yearly':
                    collection = aggregate_data_yearly(collection)
                
                image_list = collection.toList(collection.size())
                processed_weeks = set()
                for i in range(image_list.size().getInfo()):
                    image = ee.Image(image_list.get(i))
                    
                    if aggregation_period.lower() == 'custom (start date to end date)':
                        timestamp = image.get('day')
                        period_label = 'Date'
                        date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
                    elif aggregation_period.lower() == 'weekly':
                        timestamp = image.get('week_start')
                        period_label = 'Week'
                        date = ee.String(timestamp).getInfo()
                        if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or 
                            pd.to_datetime(date) > pd.to_datetime(end_date_str) or 
                            date in processed_weeks):
                            continue
                        processed_weeks.add(date)
                    elif aggregation_period.lower() == 'monthly':
                        timestamp = image.get('month')
                        period_label = 'Month'
                        date = ee.Date(timestamp).format('YYYY-MM').getInfo()
                    elif aggregation_period.lower() == 'yearly':
                        timestamp = image.get('year')
                        period_label = 'Year'
                        date = ee.Date(timestamp).format('YYYY').getInfo()
                    
                    index_image = calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice)
                    
                    try:
                        index_value = index_image.reduceRegion(
                            reducer=get_reducer(reducer_choice),
                            geometry=roi,
                            scale=30
                        ).get('custom_result')
                        
                        calculated_value = index_value.getInfo()
                        
                        if isinstance(calculated_value, (int, float)):
                            aggregated_results.append({
                                'Location Name': location_name,
                                'Latitude': latitude,
                                'Longitude': longitude,
                                period_label: date,
                                'Start Date': start_date_str,
                                'End Date': end_date_str,
                                'Calculated Value': calculated_value
                            })
                        else:
                            st.warning(f"Skipping invalid value for {location_name} on {date}")
                    except Exception as e:
                        st.error(f"Error retrieving value for {location_name}: {e}")
                
                progress_percentage = (idx + 1) / total_steps
                progress_bar.progress(progress_percentage)
                progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
        
        elif shape_type.lower() == "polygon":
            for idx, row in locations_df.iterrows():
                polygon_name = row.get('name', f"Polygon_{idx}")
                polygon_geometry = row.get('geometry')
                location_name = polygon_name
                
                try:
                    roi = convert_to_ee_geometry(polygon_geometry)
                    if not include_boundary:
                        roi = roi.buffer(-30).bounds()
                except ValueError as e:
                    st.warning(f"Skipping invalid polygon {polygon_name}: {e}")
                    continue
                
                collection = ee.ImageCollection(dataset_id) \
                    .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
                    .filterBounds(roi)
                
                if aggregation_period.lower() == 'custom (start date to end date)':
                    collection = aggregate_data_custom(collection)
                elif aggregation_period.lower() == 'weekly':
                    collection = aggregate_data_weekly(collection)
                elif aggregation_period.lower() == 'monthly':
                    collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
                elif aggregation_period.lower() == 'yearly':
                    collection = aggregate_data_yearly(collection)
                
                image_list = collection.toList(collection.size())
                processed_weeks = set()
                for i in range(image_list.size().getInfo()):
                    image = ee.Image(image_list.get(i))
                    
                    if aggregation_period.lower() == 'custom (start date to end date)':
                        timestamp = image.get('day')
                        period_label = 'Date'
                        date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
                    elif aggregation_period.lower() == 'weekly':
                        timestamp = image.get('week_start')
                        period_label = 'Week'
                        date = ee.String(timestamp).getInfo()
                        if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or 
                            pd.to_datetime(date) > pd.to_datetime(end_date_str) or 
                            date in processed_weeks):
                            continue
                        processed_weeks.add(date)
                    elif aggregation_period.lower() == 'monthly':
                        timestamp = image.get('month')
                        period_label = 'Month'
                        date = ee.Date(timestamp).format('YYYY-MM').getInfo()
                    elif aggregation_period.lower() == 'yearly':
                        timestamp = image.get('year')
                        period_label = 'Year'
                        date = ee.Date(timestamp).format('YYYY').getInfo()
                    
                    index_image = calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice)
                    
                    try:
                        index_value = index_image.reduceRegion(
                            reducer=get_reducer(reducer_choice),
                            geometry=roi,
                            scale=30
                        ).get('custom_result')
                        
                        calculated_value = index_value.getInfo()
                        
                        if isinstance(calculated_value, (int, float)):
                            aggregated_results.append({
                                'Location Name': location_name,
                                period_label: date,
                                'Start Date': start_date_str,
                                'End Date': end_date_str,
                                'Calculated Value': calculated_value
                            })
                        else:
                            st.warning(f"Skipping invalid value for {location_name} on {date}")
                    except Exception as e:
                        st.error(f"Error retrieving value for {location_name}: {e}")
                
                progress_percentage = (idx + 1) / total_steps
                progress_bar.progress(progress_percentage)
                progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
    
    if aggregated_results:
        result_df = pd.DataFrame(aggregated_results)
        if aggregation_period.lower() == 'custom (start date to end date)':
            agg_dict = {
                'Start Date': 'first',
                'End Date': 'first',
                'Calculated Value': 'mean'
            }
            if shape_type.lower() == 'point':
                agg_dict['Latitude'] = 'first'
                agg_dict['Longitude'] = 'first'
            aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
            aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
            return aggregated_output.to_dict(orient='records')
        else:
            return result_df.to_dict(orient='records')
    return []

# Button to trigger calculation
if st.button(f"Calculate {custom_formula}"):
    if file_upload is not None:
        if shape_type.lower() in ["point", "polygon"]:
            results = process_aggregation(
                locations_df,
                start_date_str,
                end_date_str,
                dataset_id,
                selected_bands,
                reducer_choice,
                shape_type,
                aggregation_period,
                custom_formula,
                kernel_size=kernel_size,
                include_boundary=include_boundary
            )
            if results:
                result_df = pd.DataFrame(results)
                st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                st.dataframe(result_df)
                filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                st.download_button(
                    label="Download results as CSV",
                    data=result_df.to_csv(index=False).encode('utf-8'),
                    file_name=filename,
                    mime='text/csv'
                )
                # Show an example calculation
                if st.session_state.show_example and results:
                    example_result = results[0]
                    example_image = ee.ImageCollection(dataset_id).filterDate(start_date_str, end_date_str).first()
                    example_roi = (
                        ee.Geometry.Point([example_result['Longitude'], example_result['Latitude']]) 
                        if shape_type.lower() == 'point' 
                        else convert_to_ee_geometry(locations_df['geometry'].iloc[0])
                    )
                    example_values = {}
                    for band in selected_bands:
                        value = example_image.select(band).reduceRegion(
                            reducer=get_reducer(reducer_choice),
                            geometry=example_roi,
                            scale=30
                        ).get(band).getInfo()
                        example_values[band] = float(value if value is not None else 0)
                    example_formula = custom_formula
                    for band in selected_bands:
                        example_formula = example_formula.replace(band, str(example_values[band]))
                    # st.write(f"Example Calculation: {custom_formula} -> {example_formula} = {example_result.get('Calculated Value', example_result.get('Aggregated Value'))}")
                    st.session_state.show_example = False
                st.success('Processing complete!')
            else:
                st.warning("No results were generated. Check your inputs or formula.")
        else:
            st.warning("Please upload a file to process.")
    else:
        st.warning("Please upload a file to proceed.")