Spaces:

YashMK89
/

SATRANG

Running

App Files Files Community

YashMK89 commited on Feb 12

Commit

fee251e

verified ·

1 Parent(s): c688eb2

update app.py

Browse files

Files changed (1) hide show

app.py +264 -86

app.py CHANGED Viewed

@@ -878,8 +878,7 @@ def calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_
 #         return aggregated_output.to_dict(orient='records')
 #     return []
-####################################################
 def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):
     aggregated_results = []
@@ -922,14 +921,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 # Process each image in the collection
                 image_list = collection.toList(collection.size())
-                all_values = []  # To store calculated values for mean calculation
-                start_date = None
-                end_date = None
                 for i in range(image_list.size().getInfo()):
                     image = ee.Image(image_list.get(i))
-                    # Extract the timestamp based on the aggregation period
                     if aggregation_period.lower() == 'daily':
                         timestamp = image.get('day')
                     elif aggregation_period.lower() == 'weekly':
@@ -942,12 +936,6 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                     # Format the timestamp as a valid date string
                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
-                    # Update start_date and end_date
-                    if start_date is None or date < start_date:
-                        start_date = date
-                    if end_date is None or date > end_date:
-                        end_date = date
                     # Calculate the index for each period
                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
@@ -960,23 +948,20 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                         calculated_value = index_value.getInfo()
-                        # Append valid values for mean calculation
                         if isinstance(calculated_value, (int, float)):
-                            all_values.append(calculated_value)
                     except Exception as e:
                         st.error(f"Error retrieving value for {location_name}: {e}")
-                # Calculate mean value for this location
-                if all_values:
-                    mean_value = sum(all_values) / len(all_values)
-                    aggregated_results.append({
-                        'Location Name': location_name,
-                        'Start Date': start_date,
-                        'End Date': end_date,
-                        'Mean Value': mean_value
-                    })
                 # Update progress bar
                 progress_percentage = (idx + 1) / total_steps
                 progress_bar.progress(progress_percentage)
@@ -1010,14 +995,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 # Process each image in the collection
                 image_list = collection.toList(collection.size())
-                all_values = []  # To store calculated values for mean calculation
-                start_date = None
-                end_date = None
                 for i in range(image_list.size().getInfo()):
                     image = ee.Image(image_list.get(i))
-                    # Extract the timestamp based on the aggregation period
                     if aggregation_period.lower() == 'daily':
                         timestamp = image.get('day')
                     elif aggregation_period.lower() == 'weekly':
@@ -1030,12 +1010,6 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                     # Format the timestamp as a valid date string
                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
-                    # Update start_date and end_date
-                    if start_date is None or date < start_date:
-                        start_date = date
-                    if end_date is None or date > end_date:
-                        end_date = date
                     # Calculate the index for each period
                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
@@ -1048,70 +1022,274 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                         calculated_value = index_value.getInfo()
-                        # Append valid values for mean calculation
                         if isinstance(calculated_value, (int, float)):
-                            all_values.append(calculated_value)
                     except Exception as e:
                         st.error(f"Error retrieving value for {location_name}: {e}")
-                # Calculate mean value for this location
-                if all_values:
-                    mean_value = sum(all_values) / len(all_values)
-                    aggregated_results.append({
-                        'Location Name': location_name,
-                        'Mean Value': mean_value,
-                        'Start Date': start_date,
-                        'End Date': end_date
-                    })
                 # Update progress bar
                 progress_percentage = (idx + 1) / total_steps
                 progress_bar.progress(progress_percentage)
                 progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
-    # Convert results to DataFrame and return as dictionary
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
-        # For point data, include latitude and longitude
-        if shape_type.lower() == "point":
-            # Check if 'name' column exists; if not, generate names based on latitude and longitude
-            if 'name' not in locations_df.columns:
-                if 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
-                    locations_df['name'] = locations_df.apply(
-                        lambda row: f"Location_{row.name}_{row['latitude']}_{row['longitude']}", axis=1
-                    )
-                else:
-                    st.error("Missing 'latitude' or 'longitude' columns in the input data.")
-                    return []  # Exit early if required columns are missing
-            # Ensure 'latitude' and 'longitude' columns exist before merging
-            if 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
-                result_df = result_df.merge(
-                    locations_df[['name', 'latitude', 'longitude']],
-                    left_on='Location Name',
-                    right_on='name',
-                    how='left'
-                )
-                result_df.rename(columns={'latitude': 'Latitude', 'longitude': 'Longitude'}, inplace=True)
-                result_df.drop(columns=['name'], inplace=True)
-            else:
-                st.error("Missing 'latitude' or 'longitude' columns in the input data.")
-                return []  # Exit early if required columns are missing
-        # For polygon data, add default names if 'name' column is missing
-        elif shape_type.lower() == "polygon":
-            if 'name' not in locations_df.columns:
-                locations_df['name'] = locations_df.index.map(lambda idx: f"Polygon_{idx}")
-            # Merge with 'name' column only for polygons
-            result_df = result_df.merge(
-                locations_df[['name']],
-                left_on='Location Name',
-                right_on='name',
-                how='left'
-            )
-            result_df.drop(columns=['name'], inplace=True)
 ####################################################
 # def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):

 #         return aggregated_output.to_dict(orient='records')
 #     return []
+#####################################################
 def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):
     aggregated_results = []
                 # Process each image in the collection
                 image_list = collection.toList(collection.size())
                 for i in range(image_list.size().getInfo()):
                     image = ee.Image(image_list.get(i))
                     if aggregation_period.lower() == 'daily':
                         timestamp = image.get('day')
                     elif aggregation_period.lower() == 'weekly':
                     # Format the timestamp as a valid date string
                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
                     # Calculate the index for each period
                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
                         calculated_value = index_value.getInfo()
+                        # Append the results if valid
                         if isinstance(calculated_value, (int, float)):
+                            aggregated_results.append({
+                                'Location Name': location_name,
+                                'Latitude': latitude,  # Include latitude
+                                'Longitude': longitude,  # Include longitude
+                                'Date': date,
+                                'Calculated Value': calculated_value
+                            })
+                        else:
+                            st.warning(f"Skipping invalid value for {location_name} on {date}")
                     except Exception as e:
                         st.error(f"Error retrieving value for {location_name}: {e}")
                 # Update progress bar
                 progress_percentage = (idx + 1) / total_steps
                 progress_bar.progress(progress_percentage)
                 # Process each image in the collection
                 image_list = collection.toList(collection.size())
                 for i in range(image_list.size().getInfo()):
                     image = ee.Image(image_list.get(i))
                     if aggregation_period.lower() == 'daily':
                         timestamp = image.get('day')
                     elif aggregation_period.lower() == 'weekly':
                     # Format the timestamp as a valid date string
                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
                     # Calculate the index for each period
                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
                         calculated_value = index_value.getInfo()
+                        # Append the results if valid
                         if isinstance(calculated_value, (int, float)):
+                            aggregated_results.append({
+                                'Location Name': location_name,
+                                'Date': date,
+                                'Calculated Value': calculated_value
+                            })
+                        else:
+                            st.warning(f"Skipping invalid value for {location_name} on {date}")
                     except Exception as e:
                         st.error(f"Error retrieving value for {location_name}: {e}")
                 # Update progress bar
                 progress_percentage = (idx + 1) / total_steps
                 progress_bar.progress(progress_percentage)
                 progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
+    # Perform final aggregation (e.g., mean) for each location
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
+        # Group by 'Location Name' and calculate the mean of 'Calculated Value'
+        aggregated_output = result_df.groupby('Location Name').agg({
+            'Latitude': 'first',  # Keep the first latitude value
+            'Longitude': 'first',  # Keep the first longitude value
+            'Calculated Value': 'mean'  # Calculate the mean of the calculated values
+        }).reset_index()
+        # Rename columns for clarity
+        aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
+        # Convert the DataFrame to a dictionary for output
+        return aggregated_output.to_dict(orient='records')
+    return []
+####################################################
+####################################################
+# def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):
+#     aggregated_results = []
+#     # Check if the index_choice is 'custom formula' and the custom formula is empty
+#     if index_choice.lower() == 'custom_formula' and not custom_formula:
+#         st.error("Custom formula cannot be empty. Please provide a formula.")
+#         return aggregated_results  # Return early to avoid further processing
+#     # Initialize progress bar
+#     total_steps = len(locations_df)
+#     progress_bar = st.progress(0)
+#     progress_text = st.empty()
+#     with st.spinner('Processing data...'):
+#         if shape_type.lower() == "point":
+#             for idx, row in locations_df.iterrows():
+#                 # Check if the latitude and longitude columns exist and have values
+#                 latitude = row.get('latitude')
+#                 longitude = row.get('longitude')
+#                 if pd.isna(latitude) or pd.isna(longitude):
+#                     st.warning(f"Skipping location {idx} with missing latitude or longitude")
+#                     continue
+#                 location_name = row.get('name', f"Location_{idx}")
+#                 roi = ee.Geometry.Point([longitude, latitude])
+#                 collection = ee.ImageCollection(dataset_id) \
+#                     .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
+#                     .filterBounds(roi)
+#                 # Aggregate data based on the selected period
+#                 if aggregation_period.lower() == 'daily':
+#                     collection = aggregate_data_daily(collection)
+#                 elif aggregation_period.lower() == 'weekly':
+#                     collection = aggregate_data_weekly(collection)
+#                 elif aggregation_period.lower() == 'monthly':
+#                     collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
+#                 elif aggregation_period.lower() == 'yearly':
+#                     collection = aggregate_data_yearly(collection)
+#                 # Process each image in the collection
+#                 image_list = collection.toList(collection.size())
+#                 all_values = []  # To store calculated values for mean calculation
+#                 start_date = None
+#                 end_date = None
+#                 for i in range(image_list.size().getInfo()):
+#                     image = ee.Image(image_list.get(i))
+#                     # Extract the timestamp based on the aggregation period
+#                     if aggregation_period.lower() == 'daily':
+#                         timestamp = image.get('day')
+#                     elif aggregation_period.lower() == 'weekly':
+#                         timestamp = image.get('week_start')  # Use week start date
+#                     elif aggregation_period.lower() == 'monthly':
+#                         timestamp = image.get('month')
+#                     elif aggregation_period.lower() == 'yearly':
+#                         timestamp = image.get('year')
+#                     # Format the timestamp as a valid date string
+#                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
+#                     # Update start_date and end_date
+#                     if start_date is None or date < start_date:
+#                         start_date = date
+#                     if end_date is None or date > end_date:
+#                         end_date = date
+#                     # Calculate the index for each period
+#                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
+#                     try:
+#                         index_value = index_image.reduceRegion(
+#                             reducer=get_reducer(reducer_choice),
+#                             geometry=roi,
+#                             scale=30
+#                         ).get(index_image.bandNames().get(0))
+#                         calculated_value = index_value.getInfo()
+#                         # Append valid values for mean calculation
+#                         if isinstance(calculated_value, (int, float)):
+#                             all_values.append(calculated_value)
+#                     except Exception as e:
+#                         st.error(f"Error retrieving value for {location_name}: {e}")
+#                 # Calculate mean value for this location
+#                 if all_values:
+#                     mean_value = sum(all_values) / len(all_values)
+#                     aggregated_results.append({
+#                         'Location Name': location_name,
+#                         'Start Date': start_date,
+#                         'End Date': end_date,
+#                         'Mean Value': mean_value
+#                     })
+#                 # Update progress bar
+#                 progress_percentage = (idx + 1) / total_steps
+#                 progress_bar.progress(progress_percentage)
+#                 progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
+#         elif shape_type.lower() == "polygon":
+#             for idx, row in locations_df.iterrows():
+#                 polygon_name = row.get('name', f"Polygon_{idx}")
+#                 polygon_geometry = row.get('geometry')
+#                 location_name = polygon_name
+#                 try:
+#                     roi = convert_to_ee_geometry(polygon_geometry)
+#                 except ValueError as e:
+#                     st.warning(f"Skipping invalid polygon {polygon_name}: {e}")
+#                     continue
+#                 collection = ee.ImageCollection(dataset_id) \
+#                     .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
+#                     .filterBounds(roi)
+#                 # Aggregate data based on the selected period
+#                 if aggregation_period.lower() == 'daily':
+#                     collection = aggregate_data_daily(collection)
+#                 elif aggregation_period.lower() == 'weekly':
+#                     collection = aggregate_data_weekly(collection)
+#                 elif aggregation_period.lower() == 'monthly':
+#                     collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
+#                 elif aggregation_period.lower() == 'yearly':
+#                     collection = aggregate_data_yearly(collection)
+#                 # Process each image in the collection
+#                 image_list = collection.toList(collection.size())
+#                 all_values = []  # To store calculated values for mean calculation
+#                 start_date = None
+#                 end_date = None
+#                 for i in range(image_list.size().getInfo()):
+#                     image = ee.Image(image_list.get(i))
+#                     # Extract the timestamp based on the aggregation period
+#                     if aggregation_period.lower() == 'daily':
+#                         timestamp = image.get('day')
+#                     elif aggregation_period.lower() == 'weekly':
+#                         timestamp = image.get('week_start')  # Use week start date
+#                     elif aggregation_period.lower() == 'monthly':
+#                         timestamp = image.get('month')
+#                     elif aggregation_period.lower() == 'yearly':
+#                         timestamp = image.get('year')
+#                     # Format the timestamp as a valid date string
+#                     date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
+#                     # Update start_date and end_date
+#                     if start_date is None or date < start_date:
+#                         start_date = date
+#                     if end_date is None or date > end_date:
+#                         end_date = date
+#                     # Calculate the index for each period
+#                     index_image = calculate_index_for_period(image, roi, index_choice, reducer_choice, custom_formula)
+#                     try:
+#                         index_value = index_image.reduceRegion(
+#                             reducer=get_reducer(reducer_choice),
+#                             geometry=roi,
+#                             scale=30
+#                         ).get(index_image.bandNames().get(0))
+#                         calculated_value = index_value.getInfo()
+#                         # Append valid values for mean calculation
+#                         if isinstance(calculated_value, (int, float)):
+#                             all_values.append(calculated_value)
+#                     except Exception as e:
+#                         st.error(f"Error retrieving value for {location_name}: {e}")
+#                 # Calculate mean value for this location
+#                 if all_values:
+#                     mean_value = sum(all_values) / len(all_values)
+#                     aggregated_results.append({
+#                         'Location Name': location_name,
+#                         'Mean Value': mean_value,
+#                         'Start Date': start_date,
+#                         'End Date': end_date
+#                     })
+#                 # Update progress bar
+#                 progress_percentage = (idx + 1) / total_steps
+#                 progress_bar.progress(progress_percentage)
+#                 progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
+#     # Convert results to DataFrame and return as dictionary
+#     if aggregated_results:
+#         result_df = pd.DataFrame(aggregated_results)
+#         # For point data, include latitude and longitude
+#         if shape_type.lower() == "point":
+#             # Check if 'name' column exists; if not, generate names based on latitude and longitude
+#             if 'name' not in locations_df.columns:
+#                 if 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
+#                     locations_df['name'] = locations_df.apply(
+#                         lambda row: f"Location_{row.name}_{row['latitude']}_{row['longitude']}", axis=1
+#                     )
+#                 else:
+#                     st.error("Missing 'latitude' or 'longitude' columns in the input data.")
+#                     return []  # Exit early if required columns are missing
+#             # Ensure 'latitude' and 'longitude' columns exist before merging
+#             if 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
+#                 result_df = result_df.merge(
+#                     locations_df[['name', 'latitude', 'longitude']],
+#                     left_on='Location Name',
+#                     right_on='name',
+#                     how='left'
+#                 )
+#                 result_df.rename(columns={'latitude': 'Latitude', 'longitude': 'Longitude'}, inplace=True)
+#                 result_df.drop(columns=['name'], inplace=True)
+#             else:
+#                 st.error("Missing 'latitude' or 'longitude' columns in the input data.")
+#                 return []  # Exit early if required columns are missing
+#         # For polygon data, add default names if 'name' column is missing
+#         elif shape_type.lower() == "polygon":
+#             if 'name' not in locations_df.columns:
+#                 locations_df['name'] = locations_df.index.map(lambda idx: f"Polygon_{idx}")
+#             # Merge with 'name' column only for polygons
+#             result_df = result_df.merge(
+#                 locations_df[['name']],
+#                 left_on='Location Name',
+#                 right_on='name',
+#                 how='left'
+#             )
+#             result_df.drop(columns=['name'], inplace=True)
 ####################################################
 # def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, index_choice, reducer_choice, shape_type, aggregation_period, custom_formula=""):