Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on 5 days ago

Commit

fdee337

verified ·

1 Parent(s): 9eabcdb

update app.py

Browse files

Files changed (1) hide show

app.py +42 -67

app.py CHANGED Viewed

@@ -191,10 +191,10 @@ def aggregate_data_yearly(collection):
     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
-def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None):
     if shape_type.lower() == "point":
-        latitude = row.get('latitude')
-        longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
             return None  # Skip invalid points
         location_name = row.get('name', f"Location_{row.name}")
@@ -215,12 +215,10 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                 roi = roi.buffer(-30).bounds()
         except ValueError:
             return None  # Skip invalid polygons
     # Filter and aggregate the image collection
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
@@ -229,12 +227,10 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
     # Process each image in the collection
     image_list = collection.toList(collection.size())
     processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
         if aggregation_period.lower() == 'custom (start date to end date)':
@@ -258,7 +254,6 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             timestamp = image.get('year')
             period_label = 'Year'
             date = ee.Date(timestamp).format('YYYY').getInfo()
         index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
         try:
             index_value = index_image.reduceRegion(
@@ -276,21 +271,19 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
-                    result['Latitude'] = latitude
-                    result['Longitude'] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
-def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
@@ -307,10 +300,11 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 aggregation_period,
                 custom_formula,
                 kernel_size,
-                include_boundary
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
@@ -320,11 +314,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
@@ -334,8 +326,8 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 'Calculated Value': 'mean'
             }
             if shape_type.lower() == 'point':
-                agg_dict['Latitude'] = 'first'
-                agg_dict['Longitude'] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
             aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
             return aggregated_output.to_dict(orient='records'), processing_time  # Return processing time
@@ -349,7 +341,6 @@ imagery_base = st.selectbox("Select Imagery Base", ["Sentinel", "Landsat", "MODI
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
@@ -401,12 +392,10 @@ if not data:
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
@@ -416,7 +405,6 @@ if main_selection:
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
@@ -426,11 +414,9 @@ if main_selection and sub_selection:
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
@@ -443,7 +429,6 @@ if main_selection and sub_selection:
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
@@ -453,7 +438,6 @@ if main_selection and sub_selection:
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
@@ -461,7 +445,6 @@ if main_selection and sub_selection:
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
@@ -508,47 +491,40 @@ if file_upload is not None:
         if file_upload.name.endswith('.csv'):
             # Read the CSV file
             locations_df = pd.read_csv(file_upload)
             # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
-            # Get all column names from the uploaded file
-            all_columns = locations_df.columns.tolist()
-            # Let user select latitude and longitude columns from dropdown
             col1, col2 = st.columns(2)
             with col1:
                 lat_col = st.selectbox(
-                    "Select Latitude Column",
-                    options=all_columns,
-                    index=all_columns.index('latitude') if 'latitude' in all_columns else 0,
-                    help="Select the column containing latitude values"
                 )
             with col2:
                 lon_col = st.selectbox(
-                    "Select Longitude Column",
-                    options=all_columns,
-                    index=all_columns.index('longitude') if 'longitude' in all_columns else 0,
-                    help="Select the column containing longitude values"
                 )
             # Validate the selected columns contain numeric data
             if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
-                st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
-            # Rename the selected columns to standard names for processing
-            locations_df = locations_df.rename(columns={
-                lat_col: 'latitude',
-                lon_col: 'longitude'
-            })
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
-                locations_df['latitude'] = locations_df['geometry'].y
-                locations_df['longitude'] = locations_df['geometry'].x
             else:
                 st.error("GeoJSON file doesn't contain geometry column")
                 st.stop()
@@ -571,23 +547,25 @@ if file_upload is not None:
                     st.error("No valid Point data found in the KML file.")
                 else:
                     locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
-                    locations_df['latitude'] = locations_df['geometry'].y
-                    locations_df['longitude'] = locations_df['geometry'].x
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for points if we have valid data
-        if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
-            m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
             for _, row in locations_df.iterrows():
-                latitude = row['latitude']
-                longitude = row['longitude']
                 if pd.isna(latitude) or pd.isna(longitude):
                     continue
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
@@ -617,7 +595,6 @@ if file_upload is not None:
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
@@ -646,14 +623,14 @@ if st.button(f"Calculate {custom_formula}"):
                     aggregation_period,
                     custom_formula,
                     kernel_size,
-                    include_boundary
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
@@ -661,12 +638,10 @@ if st.button(f"Calculate {custom_formula}"):
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else:

     return ee.ImageCollection(yearly_images)
 # Worker function for processing a single geometry
+def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula, kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
     if shape_type.lower() == "point":
+        latitude = row.get(lat_col)
+        longitude = row.get(lon_col)
         if pd.isna(latitude) or pd.isna(longitude):
             return None  # Skip invalid points
         location_name = row.get('name', f"Location_{row.name}")
                 roi = roi.buffer(-30).bounds()
         except ValueError:
             return None  # Skip invalid polygons
     # Filter and aggregate the image collection
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
     if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
     # Process each image in the collection
     image_list = collection.toList(collection.size())
     processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
         if aggregation_period.lower() == 'custom (start date to end date)':
             timestamp = image.get('year')
             period_label = 'Year'
             date = ee.Date(timestamp).format('YYYY').getInfo()
         index_image = calculate_custom_formula(image, roi, selected_bands, custom_formula, reducer_choice, scale=30)
         try:
             index_value = index_image.reduceRegion(
                     'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
+                    result[lat_col] = latitude
+                    result[lon_col] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     return aggregated_results
 # Main processing function
+def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id, selected_bands, reducer_choice, shape_type, aggregation_period, custom_formula="", kernel_size=None, include_boundary=None, lat_col=None, lon_col=None):
     aggregated_results = []
     total_steps = len(locations_df)
     progress_bar = st.progress(0)
     progress_text = st.empty()
     start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
                 aggregation_period,
                 custom_formula,
                 kernel_size,
+                include_boundary,
+                lat_col,
+                lon_col
             )
             futures.append(future)
         completed = 0
         for future in as_completed(futures):
             result = future.result()
             progress_percentage = completed / total_steps
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     # End timing the process
     end_time = time.time()
     processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
         if aggregation_period.lower() == 'custom (start date to end date)':
                 'Calculated Value': 'mean'
             }
             if shape_type.lower() == 'point':
+                agg_dict[lat_col] = 'first'
+                agg_dict[lon_col] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
             aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
             return aggregated_output.to_dict(orient='records'), processing_time  # Return processing time
 # Initialize data as an empty dictionary
 data = {}
 if imagery_base == "Sentinel":
     dataset_file = "sentinel_datasets.json"
     try:
     st.stop()
 st.markdown("<hr><h5><b>{}</b></h5>".format(imagery_base), unsafe_allow_html=True)
 main_selection = st.selectbox(f"Select {imagery_base} Dataset Category", list(data.keys()))
 sub_selection = None
 dataset_id = None
 if main_selection:
     sub_options = data[main_selection]["sub_options"]
     sub_selection = st.selectbox(f"Select Specific {imagery_base} Dataset ID", list(sub_options.keys()))
         dataset_id = sub_selection
 st.markdown("<hr><h5><b>Earth Engine Index Calculator</b></h5>", unsafe_allow_html=True)
 if main_selection and sub_selection:
     dataset_bands = data[main_selection]["bands"].get(sub_selection, [])
     st.write(f"Available Bands for {sub_options[sub_selection]}: {', '.join(dataset_bands)}")
         default=[dataset_bands[0]] if dataset_bands else [],
         help=f"Select 1 or 2 bands from: {', '.join(dataset_bands)}"
     )
     if len(selected_bands) < 1:
         st.warning("Please select at least one band.")
         st.stop()
     if selected_bands:
         if len(selected_bands) == 1:
             default_formula = f"{selected_bands[0]}"
             value=default_formula,
             help=f"Use only these bands: {', '.join(selected_bands)}. Examples: {example}"
         )
         def validate_formula(formula, selected_bands):
             allowed_chars = set(" +-*/()0123456789.")
             terms = re.findall(r'[a-zA-Z][a-zA-Z0-9_]*', formula)
             if not all(char in allowed_chars or char in ''.join(selected_bands) for char in formula):
                 return False, "Formula contains invalid characters. Use only bands, numbers, and operators (+, -, *, /, ())"
             return True, ""
         is_valid, error_message = validate_formula(custom_formula, selected_bands)
         if not is_valid:
             st.error(error_message)
         elif not custom_formula:
             st.warning("Please enter a custom formula to proceed.")
             st.stop()
         st.write(f"Custom Formula: {custom_formula}")
 reducer_choice = st.selectbox(
         if file_upload.name.endswith('.csv'):
             # Read the CSV file
             locations_df = pd.read_csv(file_upload)
             # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
+            # Dynamically populate dropdown menus for latitude and longitude
             col1, col2 = st.columns(2)
             with col1:
                 lat_col = st.selectbox(
+                    "Select the Latitude column",
+                    options=locations_df.columns,
+                    help="Choose the column containing latitude values."
                 )
             with col2:
                 lon_col = st.selectbox(
+                    "Select the Longitude column",
+                    options=locations_df.columns,
+                    help="Choose the column containing longitude values."
                 )
             # Validate the selected columns contain numeric data
             if not pd.api.types.is_numeric_dtype(locations_df[lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[lon_col]):
+                st.error("Error: Selected Latitude and Longitude columns must contain numeric values.")
                 st.stop()
         elif file_upload.name.endswith('.geojson'):
             locations_df = gpd.read_file(file_upload)
             if 'geometry' in locations_df.columns:
+                # Extract latitude and longitude from geometry
+                locations_df['original_latitude'] = locations_df['geometry'].y
+                locations_df['original_longitude'] = locations_df['geometry'].x
+                # Preserve original column names
+                lat_col = 'original_latitude'
+                lon_col = 'original_longitude'
             else:
                 st.error("GeoJSON file doesn't contain geometry column")
                 st.stop()
                     st.error("No valid Point data found in the KML file.")
                 else:
                     locations_df = gpd.GeoDataFrame(points, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in points]), crs="EPSG:4326")
+                    locations_df['original_latitude'] = locations_df['geometry'].y
+                    locations_df['original_longitude'] = locations_df['geometry'].x
+                    # Preserve original column names
+                    lat_col = 'original_latitude'
+                    lon_col = 'original_longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for points if we have valid data
+        if not locations_df.empty and lat_col in locations_df.columns and lon_col in locations_df.columns:
+            m = leafmap.Map(center=[locations_df[lat_col].mean(), locations_df[lon_col].mean()], zoom=10)
             for _, row in locations_df.iterrows():
+                latitude = row[lat_col]
+                longitude = row[lon_col]
                 if pd.isna(latitude) or pd.isna(longitude):
                     continue
                 m.add_marker(location=[latitude, longitude], popup=row.get('name', 'No Name'))
             st.write("Map of Uploaded Points:")
             m.to_streamlit()
     elif shape_type.lower() == "polygon":
         if file_upload.name.endswith('.csv'):
             st.error("CSV upload not supported for polygons. Please upload a GeoJSON or KML file.")
                     locations_df = gpd.GeoDataFrame(polygons, geometry=gpd.GeoSeries.from_wkt([p['geometry'] for p in polygons]), crs="EPSG:4326")
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
                     aggregation_period,
                     custom_formula,
                     kernel_size,
+                    include_boundary,
+                    lat_col=lat_col if shape_type.lower() == "point" else None,
+                    lon_col=lon_col if shape_type.lower() == "point" else None
                 )
                 if results:
                     result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",
                         file_name=filename,
                         mime='text/csv'
                     )
                     st.success(f"Processing complete! Total processing time: {processing_time:.2f} seconds.")
                 else:
                     st.warning("No results were generated. Check your inputs or formula.")
                     st.info(f"Total processing time: {processing_time:.2f} seconds.")
             except Exception as e:
                 st.error(f"An error occurred during processing: {str(e)}")
     else: