Spaces:

AgricultureLab2024
/

SATRANG

Running

App Files Files Community

YashMK89 commited on 3 days ago

Commit

609ffa3

verified ·

1 Parent(s): c84291c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -98

app.py CHANGED Viewed

@@ -11,8 +11,6 @@ from shapely.geometry import base
 from xml.etree import ElementTree as XET
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
 # Set up the page layout
 st.set_page_config(layout="wide")
@@ -145,9 +143,6 @@ def calculate_custom_formula(image, geometry, selected_bands, custom_formula, re
         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
-def aggregate_data_daily(collection):
-    return collection
 def aggregate_data_custom(collection):
     collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
     grouped_by_day = collection.aggregate_array('day').distinct()
@@ -158,27 +153,21 @@ def aggregate_data_custom(collection):
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
-def aggregate_data_weekly(collection, start_date_str, end_date_str):
-    start_date = ee.Date(start_date_str)
-    end_date = ee.Date(end_date_str)
-    # Create weekly intervals from start date to end date
-    n_weeks = end_date.difference(start_date, 'week').ceil()
-    week_starts = ee.List.sequence(0, n_weeks).map(lambda week: start_date.advance(week, 'week'))
     def calculate_weekly_mean(week_start):
-        week_end = ee.Date(week_start).advance(1, 'week')
-        weekly_collection = collection.filterDate(week_start, week_end)
         weekly_mean = weekly_collection.mean()
-        return weekly_mean.set({
-            'week_start': week_start,
-            'week_end': week_end,
-            'system:time_start': ee.Date(week_start).millis()
-        })
-    weekly_images = ee.ImageCollection(week_starts.map(calculate_weekly_mean))
-    return weekly_images.filter(ee.Filter.lte('system:time_start', end_date.millis()))
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
@@ -207,15 +196,15 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
         latitude = row.get('latitude')
         longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
-            return None
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
-            buffer_size = 45
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
-            buffer_size = 75
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
-        else:
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
@@ -225,46 +214,42 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
-            return None
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
-    if aggregation_period.lower() == 'daily':
-        collection = aggregate_data_daily(collection)
-    elif aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
-        collection = aggregate_data_weekly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
     image_list = collection.toList(collection.size())
-    processed_periods = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
-        if aggregation_period.lower() == 'daily':
-            timestamp = image.get('system:time_start')
-            period_label = 'Date'
-            date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
-        elif aggregation_period.lower() == 'custom (start date to end date)':
             timestamp = image.get('day')
             period_label = 'Date'
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'weekly':
-            week_start = image.get('week_start').getInfo()
-            week_end = image.get('week_end').getInfo()
             period_label = 'Week'
-            date = f"{week_start} to {week_end}"
-            if date in processed_periods:
                 continue
-            processed_periods.add(date)
         elif aggregation_period.lower() == 'monthly':
             timestamp = image.get('month')
             period_label = 'Month'
@@ -288,12 +273,11 @@ def process_single_geometry(row, start_date_str, end_date_str, dataset_id, selec
                     period_label: date,
                     'Start Date': start_date_str,
                     'End Date': end_date_str,
-                    'Calculated Value': calculated_value,
-                    'Date': pd.to_datetime(date.split(' to ')[0]) if ' to ' in date else pd.to_datetime(date)
                 }
                 if shape_type.lower() == 'point':
-                    result[original_lat_col] = latitude
-                    result[original_lon_col] = longitude
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
@@ -307,7 +291,7 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
     progress_bar = st.progress(0)
     progress_text = st.empty()
-    start_time = time.time()
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
@@ -339,8 +323,9 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
     end_time = time.time()
-    processing_time = end_time - start_time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
@@ -355,47 +340,10 @@ def process_aggregation(locations_df, start_date_str, end_date_str, dataset_id,
                 agg_dict[original_lon_col] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
             aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
-            return aggregated_output, processing_time
         else:
-            return result_df, processing_time
-    return pd.DataFrame(), processing_time
-# Visualization function
-def plot_results(result_df, aggregation_period):
-    if not result_df.empty and 'Date' in result_df.columns:
-        fig, ax = plt.subplots(figsize=(12, 6))
-        # Group by location and plot each line
-        for name, group in result_df.groupby('Location Name'):
-            group = group.sort_values('Date')
-            if aggregation_period.lower() == 'weekly':
-                # For weekly data, use the midpoint of the week for plotting
-                dates = group['Date'] + pd.Timedelta(days=3)
-                ax.plot(dates, group['Calculated Value'], 'o-', label=name)
-            else:
-                ax.plot(group['Date'], group['Calculated Value'], 'o-', label=name)
-        # Format x-axis based on time period
-        if aggregation_period.lower() == 'daily':
-            ax.xaxis.set_major_locator(mdates.DayLocator(interval=2))
-            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
-        elif aggregation_period.lower() == 'weekly':
-            ax.xaxis.set_major_locator(mdates.WeekdayLocator(byweekday=mdates.MO))
-            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
-        elif aggregation_period.lower() == 'monthly':
-            ax.xaxis.set_major_locator(mdates.MonthLocator())
-            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
-        elif aggregation_period.lower() == 'yearly':
-            ax.xaxis.set_major_locator(mdates.YearLocator())
-            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
-        plt.xticks(rotation=45)
-        plt.xlabel('Date')
-        plt.ylabel('Calculated Value')
-        plt.title(f'{custom_formula} Values Over Time ({aggregation_period})')
-        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
-        plt.tight_layout()
-        st.pyplot(fig)
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
@@ -530,8 +478,8 @@ start_date_str = start_date.strftime('%Y-%m-%d')
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
-    "Select Aggregation Period",
-    ["Daily", "Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
     index=0
 )
@@ -562,12 +510,17 @@ original_lon_col = None
 if file_upload is not None:
     if shape_type.lower() == "point":
         if file_upload.name.endswith('.csv'):
             locations_df = pd.read_csv(file_upload)
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
             all_columns = locations_df.columns.tolist()
             col1, col2 = st.columns(2)
             with col1:
                 original_lat_col = st.selectbox(
@@ -584,10 +537,12 @@ if file_upload is not None:
                     help="Select the column containing longitude values"
                 )
             if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
                 st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
             locations_df = locations_df.rename(columns={
                 original_lat_col: 'latitude',
                 original_lon_col: 'longitude'
@@ -629,6 +584,7 @@ if file_upload is not None:
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
             m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
             for _, row in locations_df.iterrows():
@@ -670,6 +626,7 @@ if file_upload is not None:
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
             centroid_lon = locations_df.geometry.centroid.x.mean()
@@ -686,7 +643,7 @@ if st.button(f"Calculate {custom_formula}"):
     if not locations_df.empty:
         with st.spinner("Processing Data..."):
             try:
-                result_df, processing_time = process_aggregation(
                     locations_df,
                     start_date_str,
                     end_date_str,
@@ -702,13 +659,11 @@ if st.button(f"Calculate {custom_formula}"):
                     include_boundary
                 )
-                if not result_df.empty:
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
-                    # Plot the results
-                    plot_results(result_df, aggregation_period)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",

 from xml.etree import ElementTree as XET
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import time
 # Set up the page layout
 st.set_page_config(layout="wide")
         return ee.Image(0).rename('custom_result').set('error', str(e))
 # Aggregation functions
 def aggregate_data_custom(collection):
     collection = collection.map(lambda image: image.set('day', ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')))
     grouped_by_day = collection.aggregate_array('day').distinct()
     daily_images = ee.List(grouped_by_day.map(calculate_daily_mean))
     return ee.ImageCollection(daily_images)
+def aggregate_data_weekly(collection):
+    def set_week_start(image):
+        date = ee.Date(image.get('system:time_start'))
+        days_since_week_start = date.getRelative('day', 'week')
+        offset = ee.Number(days_since_week_start).multiply(-1)
+        week_start = date.advance(offset, 'day')
+        return image.set('week_start', week_start.format('YYYY-MM-dd'))
+    collection = collection.map(set_week_start)
+    grouped_by_week = collection.aggregate_array('week_start').distinct()
     def calculate_weekly_mean(week_start):
+        weekly_collection = collection.filter(ee.Filter.eq('week_start', week_start))
         weekly_mean = weekly_collection.mean()
+        return weekly_mean.set('week_start', week_start)
+    weekly_images = ee.List(grouped_by_week.map(calculate_weekly_mean))
+    return ee.ImageCollection(weekly_images)
 def aggregate_data_monthly(collection, start_date, end_date):
     collection = collection.filterDate(start_date, end_date)
         latitude = row.get('latitude')
         longitude = row.get('longitude')
         if pd.isna(latitude) or pd.isna(longitude):
+            return None  # Skip invalid points
         location_name = row.get('name', f"Location_{row.name}")
         if kernel_size == "3x3 Kernel":
+            buffer_size = 45  # 90m x 90m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
         elif kernel_size == "5x5 Kernel":
+            buffer_size = 75  # 150m x 150m
             roi = ee.Geometry.Point([longitude, latitude]).buffer(buffer_size).bounds()
+        else:  # Point
             roi = ee.Geometry.Point([longitude, latitude])
     elif shape_type.lower() == "polygon":
         polygon_geometry = row.get('geometry')
             if not include_boundary:
                 roi = roi.buffer(-30).bounds()
         except ValueError:
+            return None  # Skip invalid polygons
+    # Filter and aggregate the image collection
     collection = ee.ImageCollection(dataset_id) \
         .filterDate(ee.Date(start_date_str), ee.Date(end_date_str)) \
         .filterBounds(roi)
+    if aggregation_period.lower() == 'custom (start date to end date)':
         collection = aggregate_data_custom(collection)
     elif aggregation_period.lower() == 'weekly':
+        collection = aggregate_data_weekly(collection)
     elif aggregation_period.lower() == 'monthly':
         collection = aggregate_data_monthly(collection, start_date_str, end_date_str)
     elif aggregation_period.lower() == 'yearly':
         collection = aggregate_data_yearly(collection)
+    # Process each image in the collection
     image_list = collection.toList(collection.size())
+    processed_weeks = set()
     aggregated_results = []
     for i in range(image_list.size().getInfo()):
         image = ee.Image(image_list.get(i))
+        if aggregation_period.lower() == 'custom (start date to end date)':
             timestamp = image.get('day')
             period_label = 'Date'
             date = ee.Date(timestamp).format('YYYY-MM-dd').getInfo()
         elif aggregation_period.lower() == 'weekly':
+            timestamp = image.get('week_start')
             period_label = 'Week'
+            date = ee.String(timestamp).getInfo()
+            if (pd.to_datetime(date) < pd.to_datetime(start_date_str) or
+                pd.to_datetime(date) > pd.to_datetime(end_date_str) or
+                date in processed_weeks):
                 continue
+            processed_weeks.add(date)
         elif aggregation_period.lower() == 'monthly':
             timestamp = image.get('month')
             period_label = 'Month'
                     period_label: date,
                     'Start Date': start_date_str,
                     'End Date': end_date_str,
+                    'Calculated Value': calculated_value
                 }
                 if shape_type.lower() == 'point':
+                    result[original_lat_col] = latitude  # Use original column name
+                    result[original_lon_col] = longitude  # Use original column name
                 aggregated_results.append(result)
         except Exception as e:
             st.error(f"Error retrieving value for {location_name}: {e}")
     progress_bar = st.progress(0)
     progress_text = st.empty()
+    start_time = time.time()  # Start timing the process
     with ThreadPoolExecutor(max_workers=10) as executor:
         futures = []
         for idx, row in locations_df.iterrows():
             progress_bar.progress(progress_percentage)
             progress_text.markdown(f"Processing: {int(progress_percentage * 100)}%")
+    # End timing the process
     end_time = time.time()
+    processing_time = end_time - start_time  # Calculate total processing time
     if aggregated_results:
         result_df = pd.DataFrame(aggregated_results)
                 agg_dict[original_lon_col] = 'first'
             aggregated_output = result_df.groupby('Location Name').agg(agg_dict).reset_index()
             aggregated_output.rename(columns={'Calculated Value': 'Aggregated Value'}, inplace=True)
+            return aggregated_output.to_dict(orient='records'), processing_time
         else:
+            return result_df.to_dict(orient='records'), processing_time
+    return [], processing_time
 # Streamlit App Logic
 st.markdown("<h5>Image Collection</h5>", unsafe_allow_html=True)
 end_date_str = end_date.strftime('%Y-%m-%d')
 aggregation_period = st.selectbox(
+    "Select Aggregation Period (e.g, Custom(Start Date to End Date) , Weekly , Monthly , Yearly)",
+    ["Custom (Start Date to End Date)", "Weekly", "Monthly", "Yearly"],
     index=0
 )
 if file_upload is not None:
     if shape_type.lower() == "point":
         if file_upload.name.endswith('.csv'):
+            # Read the CSV file
             locations_df = pd.read_csv(file_upload)
+            # Show the first few rows to help user identify columns
             st.write("Preview of your uploaded data (first 5 rows):")
             st.dataframe(locations_df.head())
+            # Get all column names from the uploaded file
             all_columns = locations_df.columns.tolist()
+            # Let user select latitude and longitude columns from dropdown
             col1, col2 = st.columns(2)
             with col1:
                 original_lat_col = st.selectbox(
                     help="Select the column containing longitude values"
                 )
+            # Validate the selected columns contain numeric data
             if not pd.api.types.is_numeric_dtype(locations_df[original_lat_col]) or not pd.api.types.is_numeric_dtype(locations_df[original_lon_col]):
                 st.error("Error: Selected Latitude and Longitude columns must contain numeric values")
                 st.stop()
+            # Rename the selected columns to standard names for processing
             locations_df = locations_df.rename(columns={
                 original_lat_col: 'latitude',
                 original_lon_col: 'longitude'
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
+        # Display map for points if we have valid data
         if not locations_df.empty and 'latitude' in locations_df.columns and 'longitude' in locations_df.columns:
             m = leafmap.Map(center=[locations_df['latitude'].mean(), locations_df['longitude'].mean()], zoom=10)
             for _, row in locations_df.iterrows():
             except Exception as e:
                 st.error(f"Error parsing KML file: {str(e)}")
+        # Display map for polygons if we have valid data
         if not locations_df.empty and 'geometry' in locations_df.columns:
             centroid_lat = locations_df.geometry.centroid.y.mean()
             centroid_lon = locations_df.geometry.centroid.x.mean()
     if not locations_df.empty:
         with st.spinner("Processing Data..."):
             try:
+                results, processing_time = process_aggregation(
                     locations_df,
                     start_date_str,
                     end_date_str,
                     include_boundary
                 )
+                if results:
+                    result_df = pd.DataFrame(results)
                     st.write(f"Processed Results Table ({aggregation_period}) for Formula: {custom_formula}")
                     st.dataframe(result_df)
                     filename = f"{main_selection}_{dataset_id}_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}_{aggregation_period.lower()}.csv"
                     st.download_button(
                         label="Download results as CSV",