Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on 3 days ago

Commit

4891f4b

verified ·

1 Parent(s): 1b7b230

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -56

app.py CHANGED Viewed

@@ -143,31 +143,51 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
-def aggregate_data_custom(collection):
-    collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
-    grouped_by_day = collection.aggregate_array('day').distinct()
-    def calculate_daily_mean(day):
-        daily_collection = collection.filter(ee.Filter.eq('day', day))
         daily_mean = daily_collection.mean()
-        return daily_mean.set('day', day)
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
-def aggregate_data_weekly(collection):
-    def set_week_start(image):
-        date = ee.Date(image.get('system:time_start'))
-        days_since_week_start = date.getRelative('day', 'week')
-        offset = ee.Number(days_since_week_start).multiply(-1)
-        week_start = date.advance(offset, 'day')
-        return image.set('week_start', week_start.format('YYYY-MM-dd'))
-    collection = collection.map(set_week_start)
-    grouped_by_week = collection.aggregate_array('week_start').distinct()
-    def calculate_weekly_mean(week_start):
-        weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
-        weekly_mean = weekly_collection.mean()
-        return weekly_mean.set('week_start', week_start)
-    weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
-    return ee.ImageCollection(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
@@ -180,6 +200,7 @@ def aggregate_data_monthly(collection, start_date, end_date):
     monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
     return ee.ImageCollection(monthly_images)
 def aggregate_data_yearly(collection):
     collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
     grouped_by_year = collection.aggregate_array('year').distinct()
@@ -190,6 +211,7 @@ def aggregate_data_yearly(collection):
     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
 def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
     if shape_type.lower() == "point":
@@ -220,11 +242,13 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
-        collection = aggregate_data_weekly(collection)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
@@ -234,13 +258,16 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
     image_list = collection.toList(collection.size())
     processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
         if aggregation_period.lower() == 'custom (start date to end date)':
             timestamp = image.get('day')
             period_label = 'Date'
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'weekly':
             timestamp = image.get('week_start')
             period_label = 'Week'
@@ -281,16 +308,15 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
 def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
@@ -312,7 +338,6 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 include_boundary
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
@@ -322,11 +347,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
@@ -351,7 +374,6 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
@@ -397,18 +419,14 @@ elif imagery_base == "Custom Input":
     else:
         st.warning("Please enter a custom dataset ID to proceed.")
         data = {}
 if not data:
     st.error("No valid dataset available. Please check your inputs.")
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -418,7 +436,6 @@ if main_selection:
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
@@ -428,11 +445,9 @@ if main_selection and sub_selection:
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
@@ -445,7 +460,6 @@ if main_selection and sub_selection:
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -455,7 +469,6 @@ if main_selection and sub_selection:
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
@@ -463,7 +476,6 @@ if main_selection and sub_selection:
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
@@ -478,8 +490,8 @@ start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
-    "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
-    ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
     index=0
 )
@@ -487,7 +499,6 @@ shape_type = st.selectbox("Do you want to process 'Point' or 'Polygon' data?", [
 kernel_size = None
 include_boundary = None
 if shape_type.lower() == "point":
     kernel_size = st.selectbox(
         "Select Calculation Area(e.g, Point , 3x3 Kernel , 5x5 Kernel)",
@@ -512,14 +523,11 @@ if file_upload is not None:
         if file_upload.name.endswith('.csv'):
             # Read the CSV file
             locations_df = pd.read_csv(file_upload)
             # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
             # Get all column names from the uploaded file
             all_columns = locations_df.columns.tolist()
             # Let user select latitude and longitude columns from dropdown
             col1, col2 = st.columns(2)
             with col1:
@@ -536,18 +544,15 @@ if file_upload is not None:
                     index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
                     help="Select the column containing longitude values"
                 )
             # Validate the selected columns contain numeric data
             if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
                 st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
             # Rename the selected columns to standard names for processing
             locations_df = locations_df.rename(columns={
                 original_lat_col: 'latitude',
                 original_lon_col: 'longitude'
             })
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
@@ -583,7 +588,6 @@ if file_upload is not None:
                     original_lon_col = 'longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for points if we have valid data
         if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
             m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
@@ -595,7 +599,6 @@ if file_upload is not None:
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
@@ -625,7 +628,6 @@ if file_upload is not None:
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
@@ -658,12 +660,10 @@ if st.button(f"Calculate {custom_formula}"):
                     kernel_size,
                     include_boundary
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
@@ -671,12 +671,10 @@ if st.button(f"Calculate {custom_formula}"):
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else:

         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
+def aggregate_data_daily(collection):
+    """
+    Aggregates data on a daily basis.
+    """
+    def set_day_start(image):
+        date = ee.Date(image.get('system:time_start'))
+        day_start = date.format('YYYY-MM-dd')
+        return image.set('day_start', day_start)
+    collection = collection.map(set_day_start)
+    grouped_by_day = collection.aggregate_array('day_start').distinct()
+    def calculate_daily_mean(day_start):
+        daily_collection = collection.filter(ee.Filter.eq('day_start', day_start))
         daily_mean = daily_collection.mean()
+        return daily_mean.set('day_start', day_start)
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
+def aggregate_data_weekly(collection, start_date_str, end_date_str):
+    """
+    Aggregates data on a weekly basis, starting from the exact start date provided by the user.
+    """
+    start_date = ee.Date(start_date_str)
+    end_date = ee.Date(end_date_str)
+    # Calculate the number of weeks between the start and end dates
+    days_diff = end_date.difference(start_date, 'day')
+    num_weeks = days_diff.divide(7).ceil().getInfo()  # Total number of weeks
+    weekly_images = []
+    for week in range(num_weeks):
+        week_start = start_date.advance(week * 7, 'day')  # Start of the week
+        week_end = week_start.advance(7, 'day')  # End of the week
+        weekly_collection = collection.filterDate(week_start, week_end)
+        if weekly_collection.size().getInfo() > 0:
+            weekly_mean = weekly_collection.mean()
+            weekly_mean = weekly_mean.set('week_start', week_start.format('YYYY-MM-dd'))
+            weekly_images.append(weekly_mean)
+    return ee.ImageCollection.fromImages(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
     monthly_images = ee.List(grouped_by_month.map(calculate_monthly_mean))
     return ee.ImageCollection(monthly_images)
 def aggregate_data_yearly(collection):
     collection = collection.map(lambda image: image.set('year', ee.Date(image.get('system:time_start')).format('YYYY')))
     grouped_by_year = collection.aggregate_array('year').distinct()
     yearly_images = ee.List(grouped_by_year.map(calculate_yearly_mean))
     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
 def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, original_lat_col, original_lon_col, kernel_size=None, include_boundary=None):
     if shape_type.lower() == "point":
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
+    elif aggregation_period.lower() == 'daily':
+        collection = aggregate_data_daily(collection)
     elif aggregation_period.lower() == 'weekly':
+        collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
     image_list = collection.toList(collection.size())
     processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
         if aggregation_period.lower() == 'custom (start date to end date)':
             timestamp = image.get('day')
             period_label = 'Date'
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
+        elif aggregation_period.lower() == 'daily':
+            timestamp = image.get('day_start')
+            period_label = 'Date'
+            date = ee.String(timestamp).getInfo()
         elif aggregation_period.lower() == 'weekly':
             timestamp = image.get('week_start')
             period_label = 'Week'
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
 def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, original_lat_col, original_lon_col, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
                 include_boundary
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
     else:
         st.warning("Please enter a custom dataset ID to proceed.")
         data = {}
 if not data:
     st.error("No valid dataset available. Please check your inputs.")
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
+    "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Daily , Weekly , Monthly , Yearly)",
+    ["Custom (Start Date to End Date)", "Daily", "Weekly", "Monthly", "Yearly"],
     index=0
 )
 kernel_size = None
 include_boundary = None
 if shape_type.lower() == "point":
     kernel_size = st.selectbox(
         "Select Calculation Area(e.g, Point , 3x3 Kernel , 5x5 Kernel)",
         if file_upload.name.endswith('.csv'):
             # Read the CSV file
             locations_df = pd.read_csv(file_upload)
             # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
             # Get all column names from the uploaded file
             all_columns = locations_df.columns.tolist()
             # Let user select latitude and longitude columns from dropdown
             col1, col2 = st.columns(2)
             with col1:
                     index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
                     help="Select the column containing longitude values"
                 )
             # Validate the selected columns contain numeric data
             if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
                 st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
             # Rename the selected columns to standard names for processing
             locations_df = locations_df.rename(columns={
                 original_lat_col: 'latitude',
                 original_lon_col: 'longitude'
             })
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
                     original_lon_col = 'longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for points if we have valid data
         if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
             m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
                     kernel_size,
                     include_boundary
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else: