SATRANG / app.py
YashMK89's picture
update app.py
c95b0fa verified
raw
history blame
31.1 kB
import streamlit as st
import json
import ee
import os
import pandas as pd
import geopandas as gpd
from datetime import datetime
import leafmap.foliumap as leafmap
import re
from shapely.geometry import base
from lxml import etree
# Set up the page layout
st.set_page_config(layout="wide")
# Custom button styling
m = st.markdown(
"""
<style>
div.stButton > button:first-child {
background-color: #006400;
color:#ffffff;
}
</style>""",
unsafe_allow_html=True,
)
# Logo
st.write(
f"""
<div style="display: flex; justify-content: space-between; align-items: center;">
<img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/ISRO_Logo.png" style="width: 20%; margin-right: auto;">
<img src="https://huggingface.co/spaces/YashMK89/GEE_Calculator/resolve/main/SAC_Logo.png" style="width: 20%; margin-left: auto;">
</div>
""",
unsafe_allow_html=True,
)
# Title
st.markdown(
f"""
<h1 style="text-align: center;">Precision Analysis for Vegetation, Water, and Air Quality</h1>
""",
unsafe_allow_html=True,
)
st.write("<h2><div style='text-align: center;'>User Inputs</div></h2>", unsafe_allow_html=True)
# Authenticate and initialize Earth Engine
earthengine_credentials = os.environ.get("EE_Authentication")
# Initialize Earth Engine with secret credentials
os.makedirs(os.path.expanduser("~/.config/earthengine/"), exist_ok=True)
with open(os.path.expanduser("~/.config/earthengine/credentials"), "w") as f:
f.write(earthengine_credentials)
ee.Initialize(project='ee-yashsacisro24')
# Load the Sentinel dataset options from JSON file
with open("sentinel_datasets.json") as f:
data = json.load(f)
# Display the title for the Streamlit app
st.title("Sentinel Dataset")
# Select dataset category (main selection)
main_selection = st.selectbox("Select Sentinel Dataset Category", list(data.keys()))
# If a category is selected, display the sub-options (specific datasets)
if main_selection:
sub_options = data[main_selection]["sub_options"]
sub_selection = st.selectbox("Select Specific Dataset ID", list(sub_options.keys()))
# Display the selected dataset ID based on user input
if sub_selection:
st.write(f"You selected: {main_selection} -> {sub_selection}")
st.write(f"Dataset ID: {sub_options[sub_selection]}")
# Fetch the correct dataset ID from the sub-selection
dataset_id = sub_options[sub_selection]
# Earth Engine Index Calculator Section
st.header("Earth Engine Index Calculator")
index_choice = st.selectbox("Select an Index or Enter Custom Formula", ['NDVI', 'NDWI', 'Average NO₂', 'Custom Formula'])
# Initialize custom_formula variable
custom_formula = ""
# Display corresponding formula based on the index selected (case-insensitive)
if index_choice.lower() == 'ndvi':
st.write("Formula for NDVI: NDVI = (B8 - B4) / (B8 + B4)")
elif index_choice.lower() == 'ndwi':
st.write("Formula for NDWI: NDWI = (B3 - B8) / (B3 + B8)")
elif index_choice.lower() == 'average no₂':
st.write("Formula for Average NO₂: Average NO₂ = Mean(NO2 band)")
elif index_choice.lower() == 'custom formula':
custom_formula = st.text_input("Enter Custom Formula (e.g., B5,B4)")
# Check if custom formula is empty and show warning
if not custom_formula:
st.warning("Please enter a custom formula before proceeding.")
else:
st.write(f"Custom Formula: (band1 - band2) / (band1 + band2)") # Display the custom formula after the user inputs it
# Function to get the corresponding reducer based on user input
def get_reducer(reducer_name):
"""
Map user-friendly reducer names to Earth Engine reducer objects.
"""
reducers = {
'mean': ee.Reducer.mean(),
'sum': ee.Reducer.sum(),
'median': ee.Reducer.median(),
'min': ee.Reducer.min(),
'max': ee.Reducer.max(),
'count': ee.Reducer.count(),
}
# Default to 'mean' if the reducer_name is not recognized
return reducers.get(reducer_name.lower(), ee.Reducer.mean())
# Streamlit selectbox for reducer choice
reducer_choice = st.selectbox(
"Select Reducer",
['mean', 'sum', 'median', 'min', 'max', 'count'],
index=0 # Default to 'mean'
)
def convert_to_ee_geometry(geometry):
# Handle shapely geometry
if isinstance(geometry, base.BaseGeometry): # Corrected here
if geometry.is_valid:
geojson = geometry.__geo_interface__
return ee.Geometry(geojson)
else:
raise ValueError("Invalid geometry: The polygon geometry is not valid.")
# Handle GeoJSON input (string or dictionary)
elif isinstance(geometry, dict) or isinstance(geometry, str):
try:
if isinstance(geometry, str):
geometry = json.loads(geometry)
if 'type' in geometry and 'coordinates' in geometry:
return ee.Geometry(geometry)
else:
raise ValueError("GeoJSON format is invalid.")
except Exception as e:
raise ValueError(f"Error parsing GeoJSON: {e}")
# Handle KML input (string or file path)
elif isinstance(geometry, str) and geometry.lower().endswith(".kml"):
try:
# Parse KML file
tree = etree.parse(geometry)
kml_root = tree.getroot()
# Extract coordinates from KML geometry (assuming it's a Polygon or MultiPolygon)
# This approach is simplistic; it assumes KML has a <coordinates> tag
# which may need customization based on KML structure.
kml_namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
coordinates = kml_root.findall(".//kml:coordinates", kml_namespace)
if coordinates:
# Convert KML coordinates to GeoJSON-like structure
coords = coordinates[0].text.strip().split()
coords = [tuple(map(float, coord.split(','))) for coord in coords]
geojson = {
"type": "Polygon",
"coordinates": [coords]
}
return ee.Geometry(geojson)
else:
raise ValueError("KML does not contain valid coordinates.")
except Exception as e:
raise ValueError(f"Error parsing KML: {e}")
else:
raise ValueError("Unsupported geometry input type. Supported types are Shapely, GeoJSON, and KML.")
# Function to read points from CSV
def read_csv(file_path):
df = pd.read_csv(file_path)
return df
# Function to read points from GeoJSON
def read_geojson(file_path):
gdf = gpd.read_file(file_path)
return gdf
# Function to read points from KML
def read_kml(file_path):
gdf = gpd.read_file(file_path, driver='KML')
return gdf
# Ask user whether they want to process 'Point' or 'Polygon' data (case-insensitive)
shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", ["Point", "Polygon"])
# Ask user to upload a file based on shape type (case-insensitive)
file_upload = st.file_uploader(f"Upload your {shape_type} data (CSV, GeoJSON, KML)", type=["csv", "geojson", "kml"])
if file_upload is not None:
# Read the user-uploaded file
if shape_type.lower() == "point":
# Handle different file types for Point data
if file_upload.name.endswith('.csv'):
locations_df = pd.read_csv(file_upload)
elif file_upload.name.endswith('.geojson'):
locations_df = gpd.read_file(file_upload)
elif file_upload.name.endswith('.kml'):
locations_df = gpd.read_file(file_upload)
else:
st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
locations_df = pd.DataFrame()
# Check if the file contains polygons when the user selected "Point"
if 'geometry' in locations_df.columns:
# Check if the geometry type is Polygon or MultiPolygon
if locations_df.geometry.geom_type.isin(['Polygon', 'MultiPolygon']).any():
st.warning("The uploaded file contains polygon data. Please select 'Polygon' for processing.")
st.stop() # Stop further processing if polygons are detected
# Processing the point data
with st.spinner('Processing data...'):
if locations_df is not None and not locations_df.empty:
# For GeoJSON data, the coordinates are in the geometry column
if 'geometry' in locations_df.columns:
# Extract latitude and longitude from the geometry column
locations_df['latitude'] = locations_df['geometry'].y
locations_df['longitude'] = locations_df['geometry'].x
# Ensure the necessary columns exist in the dataframe
if 'latitude' not in locations_df.columns or 'longitude' not in locations_df.columns:
st.error("Uploaded file is missing required 'latitude' or 'longitude' columns.")
else:
# Display a preview of the points data
st.write("Preview of the uploaded points data:")
st.dataframe(locations_df.head())
# Create a LeafMap object to display the points
m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
# Add points to the map using a loop
for _, row in locations_df.iterrows():
latitude = row['latitude']
longitude = row['longitude']
# Check if latitude or longitude are NaN and skip if they are
if pd.isna(latitude) or pd.isna(longitude):
continue # Skip this row and move to the next one
m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
# Display map
st.write("Map of Uploaded Points:")
m.to_streamlit()
# Store the map in session_state
st.session_state.map_data = m
elif shape_type.lower() == "polygon":
# Handle different file types for Polygon data:
if file_upload.name.endswith('.csv'):
locations_df = pd.read_csv(file_upload)
elif file_upload.name.endswith('.geojson'):
locations_df = gpd.read_file(file_upload)
elif file_upload.name.endswith('.kml'):
locations_df = gpd.read_file(file_upload)
else:
st.error("Unsupported file format. Please upload CSV, GeoJSON, or KML.")
locations_df = pd.DataFrame()
# Check if the file contains points when the user selected "Polygon"
if 'geometry' in locations_df.columns:
# Check if the geometry type is Point or MultiPoint
if locations_df.geometry.geom_type.isin(['Point', 'MultiPoint']).any():
st.warning("The uploaded file contains point data. Please select 'Point' for processing.")
st.stop() # Stop further processing if point data is detected
# Processing the polygon data
with st.spinner('Processing data...'):
if locations_df is not None and not locations_df.empty:
# Ensure the 'geometry' column exists in the dataframe
if 'geometry' not in locations_df.columns:
st.error("Uploaded file is missing required 'geometry' column.")
else:
# Display a preview of the polygons data
st.write("Preview of the uploaded polygons data:")
st.dataframe(locations_df.head())
# Create a LeafMap object to display the polygons
# Calculate the centroid of the polygons for the map center
centroid_lat = locations_df.geometry.centroid.y.mean()
centroid_lon = locations_df.geometry.centroid.x.mean()
m = leafmap.Map(center=[centroid_lat, centroid_lon], zoom=10)
# Add polygons to the map using a loop
for _, row in locations_df.iterrows():
polygon = row['geometry']
if polygon.is_valid: # Check if polygon is valid
# Create a GeoDataFrame for this polygon
gdf = gpd.GeoDataFrame([row], geometry=[polygon], crs=locations_df.crs)
m.add_gdf(gdf=gdf, layer_name=row.get('name', 'Unnamed Polygon'))
# Display map
st.write("Map of Uploaded Polygons:")
m.to_streamlit()
# Store the map in session_state
st.session_state.map_data = m
# Date Input for Start and End Dates
start_date = st.date_input("Start Date", value=pd.to_datetime('2020-01-01'))
end_date = st.date_input("End Date", value=pd.to_datetime('2020-12-31'))
# Convert start_date and end_date to string format for Earth Engine
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')
# Aggregation period selection
aggregation_period = st.selectbox("Select Aggregation Period", ["Daily", "Weekly", "Monthly", "Yearly"], index=0)
# Initialize session state for storing results if not already done
if 'results' not in st.session_state:
st.session_state.results = []
if 'last_params' not in st.session_state:
st.session_state.last_params = {}
if 'map_data' not in st.session_state:
st.session_state.map_data = None # Initialize map_data
# Function to check if parameters have changed
def parameters_changed():
return (
st.session_state.last_params.get('main_selection') != main_selection or
st.session_state.last_params.get('dataset_id') != dataset_id or
st.session_state.last_params.get('index_choice') != index_choice or
st.session_state.last_params.get('start_date_str') != start_date_str or
st.session_state.last_params.get('end_date_str') != end_date_str or
st.session_state.last_params.get('shape_type') != shape_type or
st.session_state.last_params.get('file_upload') != file_upload
)
# If parameters have changed, reset the results
if parameters_changed():
st.session_state.results = [] # Clear the previous results
st.session_state.last_params = {
'main_selection': main_selection,
'dataset_id': dataset_id,
'index_choice': index_choice,
'start_date_str': start_date_str,
'end_date_str': end_date_str,
'shape_type': shape_type,
'file_upload': file_upload
}
# Function to calculate NDVI with the selected reducer
def calculate_ndvi(image, geometry, reducer_choice):
ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
return ndvi
# Function to calculate NDWI
def calculate_ndwi(image, geometry, reducer_choice):
ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
return ndwi
def calculate_custom_formula(image, geometry, custom_formula, reducer_choice, scale=30):
# Calculate NDWI using the user-specified bands
band1 = custom_formula[:custom_formula.find(",")]
band2 = custom_formula[custom_formula.find(",")+1:]
custom_formula = image.normalizedDifference([band1, band2]).rename('custom formula')
return custom_formula
# Modify aggregation functions to return the correct time period and aggregated results
def aggregate_data_daily(collection):
# Extract day from the image date (using the exact date)
collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
# Group images by day (distinct days)
grouped_by_day = collection.aggregate_array('day').distinct()
def calculate_daily_mean(day):
# Filter the collection by the specific day
daily_collection = collection.filter(ee.Filter.eq('day', day))
daily_mean = daily_collection.mean() # Calculate mean for the day
return daily_mean.set('day', day)
# Calculate the daily mean for each day
daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
return ee.ImageCollection(daily_images)
def aggregate_data_weekly(collection):
# Extract week and year from the image date
collection = collection.map(lambda image: image.set('week', ee.Date(image.get('system:time_start')).format('YYYY-ww')))
# Group images by week
grouped_by_week = collection.aggregate_array('week').distinct()
def calculate_weekly_mean(week):
weekly_collection = collection.filter(ee.Filter.eq('week', week))
weekly_mean = weekly_collection.mean()
return weekly_mean.set('week', week)
# Calculate the weekly mean for each week
weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
return ee.ImageCollection(weekly_images)
def aggregate_data_monthly(collection):
# Extract month and year from the image date
collection = collection.map(lambda image: image.set('month', ee.Date(image.get('system:time_start')).format('YYYY-MM')))
# Group images by month
grouped_by_month = collection.aggregate_array('month').distinct()
def calculate_monthly_mean(month):
monthly_collection = collection.filter(ee.Filter.eq('month', month))
monthly_mean = monthly_collection.mean()
return monthly_mean.set('month', month)
# Calculate the monthly mean for each month
monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
return ee.ImageCollection(monthly_images)
def aggregate_data_yearly(collection):
# Extract year from the image date
collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
# Group images by year
grouped_by_year = collection.aggregate_array('year').distinct()
def calculate_yearly_mean(year):
yearly_collection = collection.filter(ee.Filter.eq('year', year))
yearly_mean = yearly_collection.mean()
return yearly_mean.set('year', year)
# Calculate the yearly mean for each year
yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
return ee.ImageCollection(yearly_images)
# Function to calculate index based on the selected choice
def calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula):
if index_choice.lower() == 'ndvi':
return calculate_ndvi(image, roi, reducer_choice)
elif index_choice.lower() == 'ndwi':
return calculate_ndwi(image, roi, reducer_choice)
elif index_choice.lower() == 'average no₂':
mean_no2 = image.select('NO2').mean().rename('Average NO₂')
return mean_no2
elif index_choice.lower() == 'custom formula':
# Pass the custom formula here, not the index_choice
return calculate_custom_formula(image, roi, custom_formula, reducer_choice)
else:
st.write("Please Select any one option...."+ index_choice.lower())
def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):
aggregated_results = []
# Check if the index_choice is 'custom formula' and the custom formula is empty
if index_choice.lower() == 'custom_formula' and not custom_formula:
st.error("Custom formula cannot be empty. Please provide a formula.")
return aggregated_results # Return early to avoid further processing
# Initialize progress bar
total_steps = len(locations_df)
progress_bar = st.progress(0)
progress_text = st.empty()
with st.spinner('Processing data...'):
if shape_type.lower() == "point":
for idx, row in locations_df.iterrows():
# Check if the latitude and longitude columns exist and have values
latitude = row.get('latitude')
longitude = row.get('longitude')
if pd.isna(latitude) or pd.isna(longitude):
st.warning(f"Skipping location {idx} with missing latitude or longitude")
continue
location_name = row.get('name', f"Location_{idx}")
roi = ee.Geometry.Point([longitude, latitude])
collection = ee.ImageCollection(dataset_id) \
.filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
.filterBounds(roi)
# Aggregate data based on the selected period
if aggregation_period.lower() == 'daily':
collection = aggregate_data_daily(collection)
elif aggregation_period.lower() == 'weekly':
collection = aggregate_data_weekly(collection)
elif aggregation_period.lower() == 'monthly':
collection = aggregate_data_monthly(collection)
elif aggregation_period.lower() == 'yearly':
collection = aggregate_data_yearly(collection)
# Process each image in the collection
image_list = collection.toList(collection.size())
for i in range(image_list.size().getInfo()):
image = ee.Image(image_list.get(i))
if aggregation_period.lower() == 'daily':
timestamp = image.get('day')
elif aggregation_period.lower() == 'weekly':
timestamp = image.get('week')
elif aggregation_period.lower() == 'monthly':
timestamp = image.get('month')
elif aggregation_period.lower() == 'yearly':
timestamp = image.get('year')
date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
# Calculate the index for each period
index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
# Skip if index_image is None
if index_image is None:
st.warning(f"Index calculation failed for {location_name} on {date}. Skipping this entry.")
continue
# Reduce the region to get the aggregated value
try:
index_value = index_image.reduceRegion(
reducer=get_reducer(reducer_choice),
geometry=roi,
scale=30
).get(index_image.bandNames().get(0))
calculated_value = index_value.getInfo()
# Append the results if valid
if isinstance(calculated_value, (int, float)):
aggregated_results.append({
'Location Name': location_name,
'Latitude': latitude,
'Longitude': longitude,
'Date': date,
'Calculated Value': calculated_value
})
else:
st.warning(f"Skipping invalid value for {location_name} on {date}")
except Exception as e:
st.error(f"Error retrieving value for {location_name}: {e}")
# Update progress bar
progress_percentage = (idx + 1) / total_steps
progress_bar.progress(progress_percentage)
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
elif shape_type.lower() == "polygon":
for idx, row in locations_df.iterrows():
polygon_name = row.get('name', f"Polygon_{idx}")
polygon_geometry = row.get('geometry')
location_name = polygon_name
try:
roi = convert_to_ee_geometry(polygon_geometry)
except ValueError as e:
st.warning(f"Skipping invalid polygon {polygon_name}: {e}")
continue
collection = ee.ImageCollection(dataset_id) \
.filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
.filterBounds(roi)
# Aggregate data based on the selected period
if aggregation_period.lower() == 'daily':
collection = aggregate_data_daily(collection)
elif aggregation_period.lower() == 'weekly':
collection = aggregate_data_weekly(collection)
elif aggregation_period.lower() == 'monthly':
collection = aggregate_data_monthly(collection)
elif aggregation_period.lower() == 'yearly':
collection = aggregate_data_yearly(collection)
# Process each image in the collection
image_list = collection.toList(collection.size())
for i in range(image_list.size().getInfo()):
image = ee.Image(image_list.get(i))
if aggregation_period.lower() == 'daily':
timestamp = image.get('day')
elif aggregation_period.lower() == 'weekly':
timestamp = image.get('week')
elif aggregation_period.lower() == 'monthly':
timestamp = image.get('month')
elif aggregation_period.lower() == 'yearly':
timestamp = image.get('year')
date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
# Calculate the index for each period
index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
# Skip if index_image is None
if index_image is None:
st.warning(f"Index calculation failed for {location_name} on {date}. Skipping this entry.")
continue
# Reduce the region to get the aggregated value
try:
index_value = index_image.reduceRegion(
reducer=get_reducer(reducer_choice),
geometry=roi,
scale=30
).get(index_image.bandNames().get(0))
calculated_value = index_value.getInfo()
# Append the results if valid
if isinstance(calculated_value, (int, float)):
aggregated_results.append({
'Location Name': location_name,
'Date': date,
'Calculated Value': calculated_value
})
else:
st.warning(f"Skipping invalid value for {location_name} on {date}")
except Exception as e:
st.error(f"Error retrieving value for {location_name}: {e}")
# Update progress bar
progress_percentage = (idx + 1) / total_steps
progress_bar.progress(progress_percentage)
progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
return aggregated_results
# When the user clicks the process button, start the calculation
if st.button(f"Calculate ({index_choice})"):
if file_upload is not None:
# Read the user-uploaded file
if shape_type.lower() == "point":
# Process results for the selected aggregation period
results = process_aggregation(
locations_df,
start_date_str,
end_date_str,
dataset_id,
index_choice,
reducer_choice,
shape_type,
aggregation_period,
custom_formula
)
# Display the results in a DataFrame
if results:
result_df = pd.DataFrame(results)
st.write(f"Processed Results Table ({aggregation_period}):")
st.dataframe(result_df)
# Provide a download button for the result CSV file
filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
st.download_button(
label="Download results as CSV",
data=result_df.to_csv(index=False).encode('utf-8'),
file_name=filename,
mime='text/csv'
)
# Once processing is complete, hide the spinner
st.spinner('') # This will stop the spinner
st.success('Processing complete!')
else:
st.warning("No results were generated.")
elif shape_type.lower() == "polygon":
# Process results for the selected aggregation period
results = process_aggregation(
locations_df,
start_date_str,
end_date_str,
dataset_id,
index_choice,
reducer_choice,
shape_type,
aggregation_period,
custom_formula
)
# Display the results in a DataFrame
if results:
result_df = pd.DataFrame(results)
st.write(f"Processed Results Table ({aggregation_period}):")
st.dataframe(result_df)
# Provide a download button for the result CSV file
filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y/%m/%d')}_{end_date.strftime('%Y/%m/%d')}_{aggregation_period.lower()}.csv"
st.download_button(
label="Download results as CSV",
data=result_df.to_csv(index=False).encode('utf-8'),
file_name=filename,
mime='text/csv'
)
# Once processing is complete, hide the spinner
st.spinner('') # This will stop the spinner
st.success('Processing complete!')
else:
st.warning("No results were generated.")
else:
st.warning("Please upload a file.")