SATRANG / app.py
YashMK89's picture
update app.py
8e4e9db verified
raw
history blame
68.9 kB
# import streamlit as st
# import json
# import ee
# import os
# import pandas as pd
# import geopandas as gpd
# from datetime import datetime
# import leafmap.foliumap as leafmap
# import re
# from shapely.geometry import base
# from lxml import etree
# from xml.etree import ElementTree as ET
# # Set up the page layout
# st.set_page_config(layout="wide")
# # Custom button styling
# m = st.markdown(
# """
# <style>
# div.stButton > button:first-child {
# background-color: #006400;
# color:#ffffff;
# }
# </style>""",
# unsafe_allow_html=True,
# )
# # Logo
# st.write(
# f"""
# <div style="display: flex; justify-content: space-between; align-items: center;">
# <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/ISRO_Logo.png" style="width: 20%; margin-right: auto;">
# <img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/SAC_Logo.png" style="width: 20%; margin-left: auto;">
# </div>
# """,
# unsafe_allow_html=True,
# )
# # Title
# st.markdown(
# f"""
# <h1 style="text-align: center;">Precision Analysis for Vegetation, Water, and Air Quality</h1>
# """,
# unsafe_allow_html=True,
# )
# st.write("<h2><div style='text-align: center;'>User Inputs</div></h2>", unsafe_allow_html=True)
# # Authenticate and initialize Earth Engine
# earthengine_credentials = os.environ.get("EE_Authentication")
# # Initialize Earth Engine with secret credentials
# os.makedirs(os.path.expanduser("~/.config/earthengine/"), exist_ok=True)
# with open(os.path.expanduser("~/.config/earthengine/credentials"), "w") as f:
# f.write(earthengine_credentials)
# ee.Initialize(project='ee-yashsacisro24')
# # Load the Sentinel dataset options from JSON file
# with open("sentinel_datasets.json") as f:
# data = json.load(f)
# # Display the title for the Streamlit app
# st.title("Sentinel Dataset")
# # Select dataset category (main selection)
# main_selection = st.selectbox("Select Sentinel Dataset Category", list(data.keys()))
# # If a category is selected, display the sub-options (specific datasets)
# if main_selection:
# sub_options = data[main_selection]["sub_options"]
# sub_selection = st.selectbox("Select Specific Dataset ID", list(sub_options.keys()))
# # Display the selected dataset ID based on user input
# if sub_selection:
# st.write(f"You selected: {main_selection} -> {sub_selection}")
# st.write(f"Dataset ID: {sub_options[sub_selection]}")
# # Fetch the correct dataset ID from the sub-selection
# dataset_id = sub_options[sub_selection]
# # Earth Engine Index Calculator Section
# st.header("Earth Engine Index Calculator")
# index_choice = st.selectbox("Select an Index or Enter Custom Formula", ['NDVI', 'NDWI', 'Average NO₂', 'Custom Formula'])
# # Initialize custom_formula variable
# custom_formula = ""
# # Display corresponding formula based on the index selected (case-insensitive)
# if index_choice.lower() == 'ndvi':
# st.write("Formula for NDVI: NDVI = (B8 - B4) / (B8 + B4)")
# elif index_choice.lower() == 'ndwi':
# st.write("Formula for NDWI: NDWI = (B3 - B8) / (B3 + B8)")
# elif index_choice.lower() == 'average no₂':
# st.write("Formula for Average NO₂: Average NO₂ = Mean(NO2 band)")
# elif index_choice.lower() == 'custom formula':
# custom_formula = st.text_input("Enter Custom Formula (e.g., B5,B4 for two bands or B3 for one band)")
# # Check if custom formula is empty and show warning
# if not custom_formula:
# st.warning("Please enter a custom formula before proceeding.")
# else:
# # Check if the input contains a comma (indicating two bands)
# if ',' in custom_formula:
# # Split the input into two bands and strip whitespace
# band1, band2 = [b.strip() for b in custom_formula.split(',', 1)]
# st.write(f"Custom Formula: ({band1} - {band2}) / ({band1} + {band2})")
# else:
# # Single band case
# band = custom_formula.strip()
# st.write(f"Custom Formula: {band}")
# # Function to get the corresponding reducer based on user input
# def get_reducer(reducer_name):
# """
# Map user-friendly reducer names to Earth Engine reducer objects.
# """
# reducers = {
# 'mean': ee.Reducer.mean(),
# 'sum': ee.Reducer.sum(),
# 'median': ee.Reducer.median(),
# 'min': ee.Reducer.min(),
# 'max': ee.Reducer.max(),
# 'count': ee.Reducer.count(),
# }
# # Default to 'mean' if the reducer_name is not recognized
# return reducers.get(reducer_name.lower(), ee.Reducer.mean())
# # Streamlit selectbox for reducer choice
# reducer_choice = st.selectbox(
# "Select Reducer",
# ['mean', 'sum', 'median', 'min', 'max', 'count'],
# index=0 # Default to 'mean'
# )
# def convert_to_ee_geometry(geometry):
# # Handle Shapely geometry
# if isinstance(geometry, base.BaseGeometry):
# if geometry.is_valid:
# geojson = geometry.__geo_interface__
# print("Shapely GeoJSON:", geojson) # Debugging: Inspect the GeoJSON structure
# return ee.Geometry(geojson)
# else:
# raise ValueError("Invalid geometry: The polygon geometry is not valid.")
# # Handle GeoJSON input (string or dictionary)
# elif isinstance(geometry, dict) or isinstance(geometry, str):
# try:
# if isinstance(geometry, str):
# geometry = json.loads(geometry)
# if 'type' in geometry and 'coordinates' in geometry:
# print("GeoJSON Geometry:", geometry) # Debugging: Inspect the GeoJSON structure
# return ee.Geometry(geometry)
# else:
# raise ValueError("GeoJSON format is invalid.")
# except Exception as e:
# raise ValueError(f"Error parsing GeoJSON: {e}")
# # Handle KML input (string or file path)
# elif isinstance(geometry, str) and geometry.lower().endswith(".kml"):
# try:
# # Parse the KML file
# tree = ET.parse(geometry)
# kml_root = tree.getroot()
# # Extract coordinates from KML geometry (assuming it's a Polygon or MultiPolygon)
# # KML coordinates are usually within the <coordinates> tag
# kml_namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
# coordinates = kml_root.findall(".//kml:coordinates", kml_namespace)
# if coordinates:
# # Extract and format coordinates
# coords_text = coordinates[0].text.strip()
# coords = coords_text.split()
# # Convert KML coordinates (comma-separated) into a list of tuples
# coords = [tuple(map(float, coord.split(','))) for coord in coords]
# geojson = {
# "type": "Polygon", # Make sure the GeoJSON type is Polygon
# "coordinates": [coords] # Wrap the coordinates in a list (required by GeoJSON format)
# }
# # Debugging: Inspect the KML-to-GeoJSON structure
# print("KML GeoJSON:", geojson)
# return ee.Geometry(geojson)
# else:
# raise ValueError("KML does not contain valid coordinates.")
# except Exception as e:
# raise ValueError(f"Error parsing KML: {e}")
# else:
# raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
# # Function to read points from CSV
# def read_csv(file_path):
# df = pd.read_csv(file_path)
# return df
# # Function to read points from GeoJSON
# def read_geojson(file_path):
# gdf = gpd.read_file(file_path)
# return gdf
# # Function to read points from KML
# def read_kml(file_path):
# gdf = gpd.read_file(file_path, driver='KML')
# return gdf
# # Date Input for Start and End Dates
# start_date = st.date_input("Start Date", value=pd.to_datetime('2024-11-01'))
# end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
# # Convert start_date and end_date to string format for Earth Engine
# start_date_str = start_date.strftime('%Y-%m-%d')
# end_date_str = end_date.strftime('%Y-%m-%d')
# # Aggregation period selection
# aggregation_period = st.selectbox("Select Aggregation Period", ["Daily", "Weekly", "Monthly", "Yearly"], index=0)
# # Ask user whether they want to process 'Point' or 'Polygon' data (case-insensitive)
# shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
# # Ask user to upload a file based on shape type (case-insensitive)
# file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
# if file_upload is not None:
# # Read the user-uploaded file
# if shape_type.lower() == "point":
# # Handle different file types for Point data
# if file_upload.name.endswith('.csv'):
# locations_df = pd.read_csv(file_upload)
# elif file_upload.name.endswith('.geojson'):
# locations_df = gpd.read_file(file_upload)
# elif file_upload.name.endswith('.kml'):
# locations_df = gpd.read_file(file_upload)
# else:
# st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
# locations_df = pd.DataFrame()
# # Check if the file contains polygons when the user selected "Point"
# if 'geometry' in locations_df.columns:
# # Check if the geometry type is Polygon or MultiPolygon
# if locations_df.geometry.geom_type.isin(['Polygon', 'MultiPolygon']).any():
# st.warning("The uploaded file contains polygon data. Please select 'Polygon' for processing.")
# st.stop() # Stop further processing if polygons are detected
# # Processing the point data
# with st.spinner('Processing Map...'):
# if locations_df is not None and not locations_df.empty:
# # For GeoJSON data, the coordinates are in the geometry column
# if 'geometry' in locations_df.columns:
# # Extract latitude and longitude from the geometry column
# locations_df['latitude'] = locations_df['geometry'].y
# locations_df['longitude'] = locations_df['geometry'].x
# # Ensure the necessary columns exist in the dataframe
# if 'latitude' not in locations_df.columns or 'longitude' not in locations_df.columns:
# st.error("Uploaded file is missing required 'latitude' or 'longitude' columns.")
# else:
# # Display a preview of the points data
# st.write("Preview of the uploaded points data:")
# st.dataframe(locations_df.head())
# # Create a LeafMap object to display the points
# m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
# # Add points to the map using a loop
# for _, row in locations_df.iterrows():
# latitude = row['latitude']
# longitude = row['longitude']
# # Check if latitude or longitude are NaN and skip if they are
# if pd.isna(latitude) or pd.isna(longitude):
# continue # Skip this row and move to the next one
# m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
# # Display map
# st.write("Map of Uploaded Points:")
# m.to_streamlit()
# # Store the map in session_state
# st.session_state.map_data = m
# elif shape_type.lower() == "polygon":
# # Handle different file types for Polygon data:
# if file_upload.name.endswith('.csv'):
# locations_df = pd.read_csv(file_upload)
# elif file_upload.name.endswith('.geojson'):
# locations_df = gpd.read_file(file_upload)
# elif file_upload.name.endswith('.kml'):
# locations_df = gpd.read_file(file_upload)
# else:
# st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
# locations_df = pd.DataFrame()
# # Check if the file contains points when the user selected "Polygon"
# if 'geometry' in locations_df.columns:
# # Check if the geometry type is Point or MultiPoint
# if locations_df.geometry.geom_type.isin(['Point', 'MultiPoint']).any():
# st.warning("The uploaded file contains point data. Please select 'Point' for processing.")
# st.stop() # Stop further processing if point data is detected
# # Processing the polygon data
# with st.spinner('Processing Map...'):
# if locations_df is not None and not locations_df.empty:
# # Ensure the 'geometry' column exists in the dataframe
# if 'geometry' not in locations_df.columns:
# st.error("Uploaded file is missing required 'geometry' column.")
# else:
# # Display a preview of the polygons data
# st.write("Preview of the uploaded polygons data:")
# st.dataframe(locations_df.head())
# # Create a LeafMap object to display the polygons
# # Calculate the centroid of the polygons for the map center
# centroid_lat = locations_df.geometry.centroid.y.mean()
# centroid_lon = locations_df.geometry.centroid.x.mean()
# m = leafmap.Map(center=[centroid_lat, centroid_lon], zoom=10)
# # Add polygons to the map using a loop
# for _, row in locations_df.iterrows():
# polygon = row['geometry']
# if polygon.is_valid: # Check if polygon is valid
# # Create a GeoDataFrame for this polygon
# gdf = gpd.GeoDataFrame([row], geometry=[polygon], crs=locations_df.crs)
# m.add_gdf(gdf=gdf, layer_name=row.get('name', 'Unnamed Polygon'))
# # Display map
# st.write("Map of Uploaded Polygons:")
# m.to_streamlit()
# # Store the map in session_state
# st.session_state.map_data = m
# # Initialize session state for storing results if not already done
# if 'results' not in st.session_state:
# st.session_state.results = []
# if 'last_params' not in st.session_state:
# st.session_state.last_params = {}
# if 'map_data' not in st.session_state:
# st.session_state.map_data = None # Initialize map_data
# # Function to check if parameters have changed
# def parameters_changed():
# return (
# st.session_state.last_params.get('main_selection') != main_selection or
# st.session_state.last_params.get('dataset_id') != dataset_id or
# st.session_state.last_params.get('index_choice') != index_choice or
# st.session_state.last_params.get('start_date_str') != start_date_str or
# st.session_state.last_params.get('end_date_str') != end_date_str or
# st.session_state.last_params.get('shape_type') != shape_type or
# st.session_state.last_params.get('file_upload') != file_upload
# )
# # If parameters have changed, reset the results
# if parameters_changed():
# st.session_state.results = [] # Clear the previous results
# st.session_state.last_params = {
# 'main_selection': main_selection,
# 'dataset_id': dataset_id,
# 'index_choice': index_choice,
# 'start_date_str': start_date_str,
# 'end_date_str': end_date_str,
# 'shape_type': shape_type,
# 'file_upload': file_upload
# }
# # Function to calculate NDVI with the selected reducer
# def calculate_ndvi(image, geometry, reducer_choice):
# ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
# return ndvi
# # Function to calculate NDWI
# def calculate_ndwi(image, geometry, reducer_choice):
# ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
# return ndwi
# def calculate_custom_formula(image, geometry, custom_formula, reducer_choice, scale=30):
# try:
# if "," in custom_formula:
# band1, band2 = [b.strip() for b in custom_formula.split(",")]
# band_names = image.bandNames().getInfo()
# if band1 not in band_names or band2 not in band_names:
# raise ValueError(f"One or both bands ({band1}, {band2}) do not exist in the image.")
# result = image.normalizedDifference([band1, band2]).rename('custom_formula')
# else:
# band = custom_formula.strip()
# band_names = image.bandNames().getInfo()
# if band not in band_names:
# raise ValueError(f"The band '{band}' does not exist in the image.")
# result = image.select(band).rename('custom_formula')
# return result
# except Exception as e:
# return ee.Image(0).rename('custom_formula').set('error', str(e))
# # Modify aggregation functions to return the correct time period and aggregated results
# def aggregate_data_daily(collection):
# # Extract day from the image date (using the exact date)
# collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
# # Group images by day (distinct days)
# grouped_by_day = collection.aggregate_array('day').distinct()
# def calculate_daily_mean(day):
# # Filter the collection by the specific day
# daily_collection = collection.filter(ee.Filter.eq('day', day))
# daily_mean = daily_collection.mean() # Calculate mean for the day
# return daily_mean.set('day', day)
# # Calculate the daily mean for each day
# daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
# return ee.ImageCollection(daily_images)
# def aggregate_data_weekly(collection):
# # Extract the start date of the week from the image date
# collection = collection.map(lambda image: image.set(
# 'week_start', ee.Date(image.get('system:time_start'))
# .advance(-ee.Date(image.get('system:time_start')).getRelative('day', 'week'), 'day')
# ))
# # Group images by week start date
# grouped_by_week = collection.aggregate_array('week_start').distinct()
# def calculate_weekly_mean(week_start):
# # Filter the collection by the specific week start date
# weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
# weekly_mean = weekly_collection.mean() # Calculate mean for the week
# return weekly_mean.set('week_start', week_start)
# # Calculate the weekly mean for each week
# weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
# return ee.ImageCollection(weekly_images)
# def aggregate_data_monthly(collection, start_date, end_date):
# # Filter the collection for the specific date range
# collection = collection.filterDate(start_date, end_date)
# # Extract month and year from the image date
# collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
# # Group images by month
# grouped_by_month = collection.aggregate_array('month').distinct()
# def calculate_monthly_mean(month):
# monthly_collection = collection.filter(ee.Filter.eq('month', month))
# monthly_mean = monthly_collection.mean()
# return monthly_mean.set('month', month)
# # Calculate the monthly mean for each month
# monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
# return ee.ImageCollection(monthly_images)
# def aggregate_data_yearly(collection):
# # Extract year from the image date
# collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
# # Group images by year
# grouped_by_year = collection.aggregate_array('year').distinct()
# def calculate_yearly_mean(year):
# yearly_collection = collection.filter(ee.Filter.eq('year', year))
# yearly_mean = yearly_collection.mean()
# return yearly_mean.set('year', year)
# # Calculate the yearly mean for each year
# yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
# return ee.ImageCollection(yearly_images)
# # Function to calculate index based on the selected choice
# def calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula):
# if index_choice.lower() == 'ndvi':
# return calculate_ndvi(image, roi, reducer_choice)
# elif index_choice.lower() == 'ndwi':
# return calculate_ndwi(image, roi, reducer_choice)
# elif index_choice.lower() == 'average no₂':
# mean_no2 = image.select('NO2').mean().rename('Average NO₂')
# return mean_no2
# elif index_choice.lower() == 'custom formula':
# # Pass the custom formula here, not the index_choice
# return calculate_custom_formula(image, roi, custom_formula, reducer_choice)
# else:
# st.write("Please Select any one option...."+ index_choice.lower())
# def aggregate_data_weekly(collection):
# def set_week_start(image):
# # Get the image timestamp
# date = ee.Date(image.get('system:time_start'))
# # Calculate days since the start of the week (0 = Monday, 6 = Sunday)
# days_since_week_start = date.getRelative('day', 'week')
# # Convert to ee.Number and negate it to get the offset to the week start
# offset = ee.Number(days_since_week_start).multiply(-1)
# # Advance the date by the negative offset to get the week start
# week_start = date.advance(offset, 'day')
# return image.set('week_start', week_start.format('YYYY-MM-dd')) # Ensure string format
# # Apply the week start calculation to each image
# collection = collection.map(set_week_start)
# # Group images by week start date
# grouped_by_week = collection.aggregate_array('week_start').distinct()
# def calculate_weekly_mean(week_start):
# # Filter the collection by the specific week start date
# weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
# weekly_mean = weekly_collection.mean() # Calculate mean for the week
# return weekly_mean.set('week_start', week_start)
# # Calculate the weekly mean for each week
# weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
# return ee.ImageCollection(weekly_images)
# def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):
# aggregated_results = []
# if index_choice.lower() == 'custom_formula' and not custom_formula:
# st.error("Custom formula cannot be empty. Please provide a formula.")
# return aggregated_results
# total_steps = len(locations_df)
# progress_bar = st.progress(0)
# progress_text = st.empty()
# with st.spinner('Processing data...'):
# if shape_type.lower() == "point":
# for idx, row in locations_df.iterrows():
# latitude = row.get('latitude')
# longitude = row.get('longitude')
# if pd.isna(latitude) or pd.isna(longitude):
# st.warning(f"Skipping location {idx} with missing latitude or longitude")
# continue
# location_name = row.get('name', f"Location_{idx}")
# roi = ee.Geometry.Point([longitude, latitude])
# collection = ee.ImageCollection(dataset_id) \
# .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
# .filterBounds(roi)
# # Aggregate data based on the selected period
# if aggregation_period.lower() == 'daily':
# collection = aggregate_data_daily(collection)
# elif aggregation_period.lower() == 'weekly':
# collection = aggregate_data_weekly(collection)
# elif aggregation_period.lower() == 'monthly':
# collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
# elif aggregation_period.lower() == 'yearly':
# collection = aggregate_data_yearly(collection)
# # Process each image in the collection
# image_list = collection.toList(collection.size())
# processed_weeks = set() # Track processed weeks to avoid duplicates
# for i in range(image_list.size().getInfo()):
# image = ee.Image(image_list.get(i))
# if aggregation_period.lower() == 'daily':
# timestamp = image.get('day')
# period_label = 'Date'
# date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
# elif aggregation_period.lower() == 'weekly':
# timestamp = image.get('week_start')
# period_label = 'Week'
# date = ee.String(timestamp).getInfo() # Already formatted as YYYY-MM-dd
# # Skip if week is outside the date range or already processed
# if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
# pd.to_datetime(date) > pd.to_datetime(end_date_str) or
# date in processed_weeks):
# continue
# processed_weeks.add(date)
# elif aggregation_period.lower() == 'monthly':
# timestamp = image.get('month')
# period_label = 'Month'
# date = ee.Date(timestamp).format('YYYY-MM').getInfo()
# elif aggregation_period.lower() == 'yearly':
# timestamp = image.get('year')
# period_label = 'Year'
# date = ee.Date(timestamp).format('YYYY').getInfo()
# index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
# try:
# index_value = index_image.reduceRegion(
# reducer=get_reducer(reducer_choice),
# geometry=roi,
# scale=30
# ).get(index_image.bandNames().get(0))
# calculated_value = index_value.getInfo()
# if isinstance(calculated_value, (int, float)):
# aggregated_results.append({
# 'Location Name': location_name,
# 'Latitude': latitude,
# 'Longitude': longitude,
# period_label: date,
# 'Start Date': start_date_str,
# 'End Date': end_date_str,
# 'Calculated Value': calculated_value
# })
# else:
# st.warning(f"Skipping invalid value for {location_name} on {date}")
# except Exception as e:
# st.error(f"Error retrieving value for {location_name}: {e}")
# progress_percentage = (idx + 1) / total_steps
# progress_bar.progress(progress_percentage)
# progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
# elif shape_type.lower() == "polygon":
# for idx, row in locations_df.iterrows():
# polygon_name = row.get('name', f"Polygon_{idx}")
# polygon_geometry = row.get('geometry')
# location_name = polygon_name
# try:
# roi = convert_to_ee_geometry(polygon_geometry)
# except ValueError as e:
# st.warning(f"Skipping invalid polygon {polygon_name}: {e}")
# continue
# collection = ee.ImageCollection(dataset_id) \
# .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
# .filterBounds(roi)
# # Aggregate data based on the selected period
# if aggregation_period.lower() == 'daily':
# collection = aggregate_data_daily(collection)
# elif aggregation_period.lower() == 'weekly':
# collection = aggregate_data_weekly(collection)
# elif aggregation_period.lower() == 'monthly':
# collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
# elif aggregation_period.lower() == 'yearly':
# collection = aggregate_data_yearly(collection)
# # Process each image in the collection
# image_list = collection.toList(collection.size())
# processed_weeks = set() # Track processed weeks to avoid duplicates
# for i in range(image_list.size().getInfo()):
# image = ee.Image(image_list.get(i))
# if aggregation_period.lower() == 'daily':
# timestamp = image.get('day')
# period_label = 'Date'
# date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
# elif aggregation_period.lower() == 'weekly':
# timestamp = image.get('week_start')
# period_label = 'Week'
# date = ee.String(timestamp).getInfo() # Already formatted as YYYY-MM-dd
# # Skip if week is outside the date range or already processed
# if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
# pd.to_datetime(date) > pd.to_datetime(end_date_str) or
# date in processed_weeks):
# continue
# processed_weeks.add(date)
# elif aggregation_period.lower() == 'monthly':
# timestamp = image.get('month')
# period_label = 'Month'
# date = ee.Date(timestamp).format('YYYY-MM').getInfo()
# elif aggregation_period.lower() == 'yearly':
# timestamp = image.get('year')
# period_label = 'Year'
# date = ee.Date(timestamp).format('YYYY').getInfo()
# index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
# try:
# index_value = index_image.reduceRegion(
# reducer=get_reducer(reducer_choice),
# geometry=roi,
# scale=30
# ).get(index_image.bandNames().get(0))
# calculated_value = index_value.getInfo()
# if isinstance(calculated_value, (int, float)):
# aggregated_results.append({
# 'Location Name': location_name,
# period_label: date,
# 'Start Date': start_date_str,
# 'End Date': end_date_str,
# 'Calculated Value': calculated_value
# })
# else:
# st.warning(f"Skipping invalid value for {location_name} on {date}")
# except Exception as e:
# st.error(f"Error retrieving value for {location_name}: {e}")
# progress_percentage = (idx + 1) / total_steps
# progress_bar.progress(progress_percentage)
# progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
# # if aggregated_results:
# # result_df = pd.DataFrame(aggregated_results)
# # if aggregation_period.lower() == 'daily':
# # aggregated_output = result_df.groupby('Location Name').agg({
# # 'Latitude': 'first' if shape_type.lower() == 'point' else None,
# # 'Longitude': 'first' if shape_type.lower() == 'point' else None,
# # 'Start Date': 'first',
# # 'End Date': 'first',
# # 'Calculated Value': 'mean'
# # }).reset_index()
# # # Remove None columns (for polygons)
# # aggregated_output = aggregated_output[[col for col in aggregated_output.columns if col is not None]]
# # aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
# # return aggregated_output.to_dict(orient='records')
# # else:
# # return result_df.to_dict(orient='records')
# # return []
# if aggregated_results:
# result_df = pd.DataFrame(aggregated_results)
# if aggregation_period.lower() == 'daily':
# # Define aggregation dictionary based on shape_type
# agg_dict = {
# 'Start Date': 'first',
# 'End Date': 'first',
# 'Calculated Value': 'mean'
# }
# if shape_type.lower() == 'point':
# agg_dict['Latitude'] = 'first'
# agg_dict['Longitude'] = 'first'
# aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
# aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
# return aggregated_output.to_dict(orient='records')
# else:
# return result_df.to_dict(orient='records')
# return []
# # When the user clicks the process button, start the calculation
# if st.button(f"Calculate ({index_choice})"):
# if file_upload is not None:
# if shape_type.lower() == "point":
# results = process_aggregation(
# locations_df,
# start_date_str,
# end_date_str,
# dataset_id,
# index_choice,
# reducer_choice,
# shape_type,
# aggregation_period,
# custom_formula
# )
# if results:
# result_df = pd.DataFrame(results)
# st.write(f"Processed Results Table ({aggregation_period}):")
# st.dataframe(result_df)
# filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
# st.download_button(
# label="Download results as CSV",
# data=result_df.to_csv(index=False).encode('utf-8'),
# file_name=filename,
# mime='text/csv'
# )
# st.spinner('')
# st.success('Processing complete!')
# else:
# st.warning("No results were generated.")
# elif shape_type.lower() == "polygon":
# results = process_aggregation(
# locations_df,
# start_date_str,
# end_date_str,
# dataset_id,
# index_choice,
# reducer_choice,
# shape_type,
# aggregation_period,
# custom_formula
# )
# if results:
# result_df = pd.DataFrame(results)
# st.write(f"Processed Results Table ({aggregation_period}):")
# st.dataframe(result_df)
# filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
# st.download_button(
# label="Download results as CSV",
# data=result_df.to_csv(index=False).encode('utf-8'),
# file_name=filename,
# mime='text/csv'
# )
# st.spinner('')
# st.success('Processing complete!')
# else:
# st.warning("No results were generated.")
# else:
# st.warning("Please upload a file.")
import streamlit as st
import json
import ee
import os
import pandas as pd
import geopandas as gpd
from datetime import datetime
import leafmap.foliumap as leafmap
import re
from shapely.geometry import base
from lxml import etree
from xml.etree import ElementTree as ET
# Set up the page layout
st.set_page_config(layout="wide")
# Custom button styling
m = st.markdown(
"""
<style>
div.stButton > button:first-child {
background-color: #006400;
color:#ffffff;
}
</style>""",
unsafe_allow_html=True,
)
# Logo
st.write(
f"""
<div style="display: flex; justify-content: space-between; align-items: center;">
<img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/ISRO_Logo.png" style="width: 20%; margin-right: auto;">
<img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/SAC_Logo.png" style="width: 20%; margin-left: auto;">
</div>
""",
unsafe_allow_html=True,
)
# Title
st.markdown(
f"""
<h1 style="text-align: center;">Precision Analysis for Vegetation, Water, and Air Quality</h1>
""",
unsafe_allow_html=True,
)
st.write("<h2><div style='text-align: center;'>User Inputs</div></h2>", unsafe_allow_html=True)
# Authenticate and initialize Earth Engine
earthengine_credentials = os.environ.get("EE_Authentication")
# Initialize Earth Engine with secret credentials
os.makedirs(os.path.expanduser("~/.config/earthengine/"), exist_ok=True)
with open(os.path.expanduser("~/.config/earthengine/credentials"), "w") as f:
f.write(earthengine_credentials)
ee.Initialize(project='ee-yashsacisro24')
# Load the Sentinel dataset options from JSON file
with open("sentinel_datasets.json") as f:
data = json.load(f)
# Display the title for the Streamlit app
st.title("Sentinel Dataset")
# Select dataset category (main selection)
main_selection = st.selectbox("Select Sentinel Dataset Category", list(data.keys()))
# If a category is selected, display the sub-options (specific datasets)
if main_selection:
sub_options = data[main_selection]["sub_options"]
sub_selection = st.selectbox("Select Specific Dataset ID", list(sub_options.keys()))
# Display the selected dataset ID based on user input
if sub_selection:
st.write(f"You selected: {main_selection} -> {sub_options[sub_selection]}")
st.write(f"Dataset ID: {sub_selection}")
dataset_id = sub_selection # Use the key directly as the dataset ID
# Earth Engine Index Calculator Section
st.header("Earth Engine Index Calculator")
# Load band information based on selected dataset
if main_selection and sub_selection:
dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
# Allow user to select 1 or 2 bands
selected_bands = st.multiselect(
"Select 1 or 2 Bands for Calculation",
options=dataset_bands,
default=[dataset_bands[0]] if dataset_bands else [],
help="Select at least 1 band and up to 2 bands."
)
# Ensure minimum 1 and maximum 2 bands are selected
if len(selected_bands) < 1:
st.warning("Please select at least one band.")
st.stop()
elif len(selected_bands) > 2:
st.warning("You can select a maximum of 2 bands.")
st.stop()
# Show custom formula input if bands are selected
if selected_bands:
default_formula = (
f"{selected_bands[0]}" if len(selected_bands) == 1
else f"({selected_bands[0]} - {selected_bands[1]}) / ({selected_bands[0]} + {selected_bands[1]})"
)
custom_formula = st.text_input(
"Enter Custom Formula (e.g., 'B3*B5/2' or '(B8 - B4) / (B8 + B4)')",
value=default_formula,
help=f"Use {', '.join(selected_bands)} in your formula. Example: 'B3*B5/2'"
)
if not custom_formula:
st.warning("Please enter a custom formula to proceed.")
st.stop()
# Display the formula
st.write(f"Custom Formula: {custom_formula}")
# Function to get the corresponding reducer based on user input
def get_reducer(reducer_name):
"""
Map user-friendly reducer names to Earth Engine reducer objects.
"""
reducers = {
'mean': ee.Reducer.mean(),
'sum': ee.Reducer.sum(),
'median': ee.Reducer.median(),
'min': ee.Reducer.min(),
'max': ee.Reducer.max(),
'count': ee.Reducer.count(),
}
return reducers.get(reducer_name.lower(), ee.Reducer.mean())
# Streamlit selectbox for reducer choice
reducer_choice = st.selectbox(
"Select Reducer",
['mean', 'sum', 'median', 'min', 'max', 'count'],
index=0 # Default to 'mean'
)
# Function to convert geometry to Earth Engine format
def convert_to_ee_geometry(geometry):
if isinstance(geometry, base.BaseGeometry):
if geometry.is_valid:
geojson = geometry.__geo_interface__
return ee.Geometry(geojson)
else:
raise ValueError("Invalid geometry: The polygon geometry is not valid.")
elif isinstance(geometry, dict) or isinstance(geometry, str):
try:
if isinstance(geometry, str):
geometry = json.loads(geometry)
if 'type' in geometry and 'coordinates' in geometry:
return ee.Geometry(geometry)
else:
raise ValueError("GeoJSON format is invalid.")
except Exception as e:
raise ValueError(f"Error parsing GeoJSON: {e}")
elif isinstance(geometry, str) and geometry.lower().endswith(".kml"):
try:
tree = ET.parse(geometry)
kml_root = tree.getroot()
kml_namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
coordinates = kml_root.findall(".//kml:coordinates", kml_namespace)
if coordinates:
coords_text = coordinates[0].text.strip()
coords = coords_text.split()
coords = [tuple(map(float, coord.split(','))) for coord in coords]
geojson = {"type": "Polygon", "coordinates": [coords]}
return ee.Geometry(geojson)
else:
raise ValueError("KML does not contain valid coordinates.")
except Exception as e:
raise ValueError(f"Error parsing KML: {e}")
else:
raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
# Date Input for Start and End Dates
start_date = st.date_input("Start Date", value=pd.to_datetime('2024-11-01'))
end_date = st.date_input("End Date", value=pd.to_datetime('2024-12-01'))
# Convert start_date and end_date to string format for Earth Engine
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')
# Aggregation period selection
aggregation_period = st.selectbox("Select Aggregation Period", ["Daily", "Weekly", "Monthly", "Yearly"], index=0)
# Ask user whether they want to process 'Point' or 'Polygon' data
shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
# Ask user to upload a file based on shape type
file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
# Additional options based on shape type
kernel_size = None
include_boundary = None
if shape_type.lower() == "point":
kernel_size = st.selectbox(
"Select Calculation Area",
["Point", "3x3 Kernel", "5x5 Kernel"],
index=0,
help="Choose 'Point' for exact point calculation, or a kernel size for area averaging."
)
elif shape_type.lower() == "polygon":
include_boundary = st.checkbox(
"Include Boundary Pixels",
value=True,
help="Check to include pixels on the polygon boundary; uncheck to exclude them."
)
if file_upload is not None:
# Read the user-uploaded file
if shape_type.lower() == "point":
if file_upload.name.endswith('.csv'):
locations_df = pd.read_csv(file_upload)
elif file_upload.name.endswith('.geojson'):
locations_df = gpd.read_file(file_upload)
elif file_upload.name.endswith('.kml'):
locations_df = gpd.read_file(file_upload)
else:
st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
locations_df = pd.DataFrame()
if 'geometry' in locations_df.columns:
if locations_df.geometry.geom_type.isin(['Polygon', 'MultiPolygon']).any():
st.warning("The uploaded file contains polygon data. Please select 'Polygon' for processing.")
st.stop()
with st.spinner('Processing Map...'):
if locations_df is not None and not locations_df.empty:
if 'geometry' in locations_df.columns:
locations_df['latitude'] = locations_df['geometry'].y
locations_df['longitude'] = locations_df['geometry'].x
if 'latitude' not in locations_df.columns or 'longitude' not in locations_df.columns:
st.error("Uploaded file is missing required 'latitude' or 'longitude' columns.")
else:
st.write("Preview of the uploaded points data:")
st.dataframe(locations_df.head())
m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
for _, row in locations_df.iterrows():
latitude = row['latitude']
longitude = row['longitude']
if pd.isna(latitude) or pd.isna(longitude):
continue
m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
st.write("Map of Uploaded Points:")
m.to_streamlit()
st.session_state.map_data = m
elif shape_type.lower() == "polygon":
if file_upload.name.endswith('.csv'):
locations_df = pd.read_csv(file_upload)
elif file_upload.name.endswith('.geojson'):
locations_df = gpd.read_file(file_upload)
elif file_upload.name.endswith('.kml'):
locations_df = gpd.read_file(file_upload)
else:
st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
locations_df = pd.DataFrame()
if 'geometry' in locations_df.columns:
if locations_df.geometry.geom_type.isin(['Point', 'MultiPoint']).any():
st.warning("The uploaded file contains point data. Please select 'Point' for processing.")
st.stop()
with st.spinner('Processing Map...'):
if locations_df is not None and not locations_df.empty:
if 'geometry' not in locations_df.columns:
st.error("Uploaded file is missing required 'geometry' column.")
else:
st.write("Preview of the uploaded polygons data:")
st.dataframe(locations_df.head())
centroid_lat = locations_df.geometry.centroid.y.mean()
centroid_lon = locations_df.geometry.centroid.x.mean()
m = leafmap.Map(center=[centroid_lat, centroid_lon], zoom=10)
for _, row in locations_df.iterrows():
polygon = row['geometry']
if polygon.is_valid:
gdf = gpd.GeoDataFrame([row], geometry=[polygon], crs=locations_df.crs)
m.add_gdf(gdf=gdf, layer_name=row.get('name', 'Unnamed Polygon'))
st.write("Map of Uploaded Polygons:")
m.to_streamlit()
st.session_state.map_data = m
# Initialize session state for storing results
if 'results' not in st.session_state:
st.session_state.results = []
if 'last_params' not in st.session_state:
st.session_state.last_params = {}
if 'map_data' not in st.session_state:
st.session_state.map_data = None
# Function to check if parameters have changed
def parameters_changed():
return (
st.session_state.last_params.get('main_selection') != main_selection or
st.session_state.last_params.get('dataset_id') != dataset_id or
st.session_state.last_params.get('selected_bands') != selected_bands or
st.session_state.last_params.get('custom_formula') != custom_formula or
st.session_state.last_params.get('start_date_str') != start_date_str or
st.session_state.last_params.get('end_date_str') != end_date_str or
st.session_state.last_params.get('shape_type') != shape_type or
st.session_state.last_params.get('file_upload') != file_upload or
st.session_state.last_params.get('kernel_size') != kernel_size or
st.session_state.last_params.get('include_boundary') != include_boundary
)
# If parameters have changed, reset the results
if parameters_changed():
st.session_state.results = []
st.session_state.last_params = {
'main_selection': main_selection,
'dataset_id': dataset_id,
'selected_bands': selected_bands,
'custom_formula': custom_formula,
'start_date_str': start_date_str,
'end_date_str': end_date_str,
'shape_type': shape_type,
'file_upload': file_upload,
'kernel_size': kernel_size,
'include_boundary': include_boundary
}
# Function to calculate custom formula using eval safely
def calculate_custom_formula(image, geometry, selected_bands, custom_formula, reducer_choice, scale=30):
try:
band_values = {}
for band in selected_bands:
band_names = image.bandNames().getInfo()
if band not in band_names:
raise ValueError(f"The band '{band}' does not exist in the image.")
band_values[band] = image.select(band)
reducer = get_reducer(reducer_choice)
reduced_values = {}
for band in selected_bands:
reduced_value = band_values[band].reduceRegion(
reducer=reducer,
geometry=geometry,
scale=scale
).get(band).getInfo()
if reduced_value is None:
reduced_value = 0
reduced_values[band] = float(reduced_value)
formula = custom_formula
for band in selected_bands:
formula = formula.replace(band, str(reduced_values[band]))
result = eval(formula, {"__builtins__": {}}, reduced_values)
if not isinstance(result, (int, float)):
raise ValueError("Formula evaluation did not result in a numeric value.")
return ee.Image.constant(result).rename('custom_result')
except ZeroDivisionError:
st.error("Error: Division by zero occurred in the formula.")
return ee.Image(0).rename('custom_result').set('error', 'Division by zero')
except SyntaxError:
st.error(f"Error: Invalid formula syntax in '{custom_formula}'.")
return ee.Image(0).rename('custom_result').set('error', 'Invalid syntax')
except ValueError as e:
st.error(f"Error: {str(e)}")
return ee.Image(0).rename('custom_result').set('error', str(e))
except Exception as e:
st.error(f"Unexpected error evaluating formula: {e}")
return ee.Image(0).rename('custom_result').set('error', str(e))
# Function to calculate index for a period
def calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice):
return calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice)
# Aggregation functions
def aggregate_data_daily(collection):
collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
grouped_by_day = collection.aggregate_array('day').distinct()
def calculate_daily_mean(day):
daily_collection = collection.filter(ee.Filter.eq('day', day))
daily_mean = daily_collection.mean()
return daily_mean.set('day', day)
daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
return ee.ImageCollection(daily_images)
def aggregate_data_weekly(collection):
def set_week_start(image):
date = ee.Date(image.get('system:time_start'))
days_since_week_start = date.getRelative('day', 'week')
offset = ee.Number(days_since_week_start).multiply(-1)
week_start = date.advance(offset, 'day')
return image.set('week_start', week_start.format('YYYY-MM-dd'))
collection = collection.map(set_week_start)
grouped_by_week = collection.aggregate_array('week_start').distinct()
def calculate_weekly_mean(week_start):
weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
weekly_mean = weekly_collection.mean()
return weekly_mean.set('week_start', week_start)
weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
return ee.ImageCollection(weekly_images)
def aggregate_data_monthly(collection, start_date, end_date):
collection = collection.filterDate(start_date, end_date)
collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
grouped_by_month = collection.aggregate_array('month').distinct()
def calculate_monthly_mean(month):
monthly_collection = collection.filter(ee.Filter.eq('month', month))
monthly_mean = monthly_collection.mean()
return monthly_mean.set('month', month)
monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
return ee.ImageCollection(monthly_images)
def aggregate_data_yearly(collection):
collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
grouped_by_year = collection.aggregate_array('year').distinct()
def calculate_yearly_mean(year):
yearly_collection = collection.filter(ee.Filter.eq('year', year))
yearly_mean = yearly_collection.mean()
return yearly_mean.set('year', year)
yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
return ee.ImageCollection(yearly_images)
# Process aggregation function with kernel and boundary options
def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
aggregated_results = []
if not custom_formula:
st.error("Custom formula cannot be empty. Please provide a formula.")
return aggregated_results
total_steps = len(locations_df)
progress_bar = st.progress(0)
progress_text = st.empty()
with st.spinner('Processing data...'):
if shape_type.lower() == "point":
for idx, row in locations_df.iterrows():
latitude = row.get('latitude')
longitude = row.get('longitude')
if pd.isna(latitude) or pd.isna(longitude):
st.warning(f"Skipping location {idx} with missing latitude or longitude")
continue
location_name = row.get('name', f"Location_{idx}")
# Define the region of interest based on kernel size
if kernel_size == "3x3 Kernel":
# Assuming 30m resolution, 3x3 kernel = 90m x 90m
buffer_size = 45 # Half of 90m to center the square
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
elif kernel_size == "5x5 Kernel":
# 5x5 kernel = 150m x 150m
buffer_size = 75 # Half of 150m
roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
else: # Point
roi = ee.Geometry.Point([longitude, latitude])
collection = ee.ImageCollection(dataset_id) \
.filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
.filterBounds(roi)
if aggregation_period.lower() == 'daily':
collection = aggregate_data_daily(collection)
elif aggregation_period.lower() == 'weekly':
collection = aggregate_data_weekly(collection)
elif aggregation_period.lower() == 'monthly':
collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
elif aggregation_period.lower() == 'yearly':
collection = aggregate_data_yearly(collection)
image_list = collection.toList(collection.size())
processed_weeks = set()
for i in range(image_list.size().getInfo()):
image = ee.Image(image_list.get(i))
if aggregation_period.lower() == 'daily':
timestamp = image.get('day')
period_label = 'Date'
date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
elif aggregation_period.lower() == 'weekly':
timestamp = image.get('week_start')
period_label = 'Week'
date = ee.String(timestamp).getInfo()
if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
pd.to_datetime(date) > pd.to_datetime(end_date_str) or
date in processed_weeks):
continue
processed_weeks.add(date)
elif aggregation_period.lower() == 'monthly':
timestamp = image.get('month')
period_label = 'Month'
date = ee.Date(timestamp).format('YYYY-MM').getInfo()
elif aggregation_period.lower() == 'yearly':
timestamp = image.get('year')
period_label = 'Year'
date = ee.Date(timestamp).format('YYYY').getInfo()
index_image = calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice)
try:
index_value = index_image.reduceRegion(
reducer=get_reducer(reducer_choice),
geometry=roi,
scale=30
).get('custom_result')
calculated_value = index_value.getInfo()
if isinstance(calculated_value, (int, float)):
aggregated_results.append({
'Location Name': location_name,
'Latitude': latitude,
'Longitude': longitude,
period_label: date,
'Start Date': start_date_str,
'End Date': end_date_str,
'Calculated Value': calculated_value
})
else:
st.warning(f"Skipping invalid value for {location_name} on {date}")
except Exception as e:
st.error(f"Error retrieving value for {location_name}: {e}")
progress_percentage = (idx + 1) / total_steps
progress_bar.progress(progress_percentage)
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
elif shape_type.lower() == "polygon":
for idx, row in locations_df.iterrows():
polygon_name = row.get('name', f"Polygon_{idx}")
polygon_geometry = row.get('geometry')
location_name = polygon_name
try:
roi = convert_to_ee_geometry(polygon_geometry)
if not include_boundary:
# Erode the polygon by a small buffer (e.g., 1 pixel = 30m) to exclude boundary
roi = roi.buffer(-30).bounds()
except ValueError as e:
st.warning(f"Skipping invalid polygon {polygon_name}: {e}")
continue
collection = ee.ImageCollection(dataset_id) \
.filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
.filterBounds(roi)
if aggregation_period.lower() == 'daily':
collection = aggregate_data_daily(collection)
elif aggregation_period.lower() == 'weekly':
collection = aggregate_data_weekly(collection)
elif aggregation_period.lower() == 'monthly':
collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
elif aggregation_period.lower() == 'yearly':
collection = aggregate_data_yearly(collection)
image_list = collection.toList(collection.size())
processed_weeks = set()
for i in range(image_list.size().getInfo()):
image = ee.Image(image_list.get(i))
if aggregation_period.lower() == 'daily':
timestamp = image.get('day')
period_label = 'Date'
date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
elif aggregation_period.lower() == 'weekly':
timestamp = image.get('week_start')
period_label = 'Week'
date = ee.String(timestamp).getInfo()
if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
pd.to_datetime(date) > pd.to_datetime(end_date_str) or
date in processed_weeks):
continue
processed_weeks.add(date)
elif aggregation_period.lower() == 'monthly':
timestamp = image.get('month')
period_label = 'Month'
date = ee.Date(timestamp).format('YYYY-MM').getInfo()
elif aggregation_period.lower() == 'yearly':
timestamp = image.get('year')
period_label = 'Year'
date = ee.Date(timestamp).format('YYYY').getInfo()
index_image = calculate_index_for_period(image, roi, selected_bands, custom_formula, reducer_choice)
try:
index_value = index_image.reduceRegion(
reducer=get_reducer(reducer_choice),
geometry=roi,
scale=30
).get('custom_result')
calculated_value = index_value.getInfo()
if isinstance(calculated_value, (int, float)):
aggregated_results.append({
'Location Name': location_name,
period_label: date,
'Start Date': start_date_str,
'End Date': end_date_str,
'Calculated Value': calculated_value
})
else:
st.warning(f"Skipping invalid value for {location_name} on {date}")
except Exception as e:
st.error(f"Error retrieving value for {location_name}: {e}")
progress_percentage = (idx + 1) / total_steps
progress_bar.progress(progress_percentage)
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
if aggregated_results:
result_df = pd.DataFrame(aggregated_results)
if aggregation_period.lower() == 'daily':
agg_dict = {
'Start Date': 'first',
'End Date': 'first',
'Calculated Value': 'mean'
}
if shape_type.lower() == 'point':
agg_dict['Latitude'] = 'first'
agg_dict['Longitude'] = 'first'
aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
return aggregated_output.to_dict(orient='records')
else:
return result_df.to_dict(orient='records')
return []
# Button to trigger calculation
if st.button("Calculate"):
if file_upload is not None:
if shape_type.lower() in ["point", "polygon"]:
results = process_aggregation(
locations_df,
start_date_str,
end_date_str,
dataset_id,
selected_bands,
reducer_choice,
shape_type,
aggregation_period,
custom_formula,
kernel_size=kernel_size,
include_boundary=include_boundary
)
if results:
result_df = pd.DataFrame(results)
st.write(f"Processed Results Table ({aggregation_period}):")
st.dataframe(result_df)
filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
st.download_button(
label="Download results as CSV",
data=result_df.to_csv(index=False).encode('utf-8'),
file_name=filename,
mime='text/csv'
)
st.spinner('')
st.success('Processing complete!')
else:
st.warning("No results were generated.")
else:
st.warning("Please upload a file.")