Spaces:

openbio
/

calculator

Build error

calculator / app.py

pgzmnk

Format.

4c8c6b4 almost 2 years ago

14.9 kB

	import datetime
	import json
	import logging
	import os

	import duckdb
	import ee
	import gradio as gr
	import pandas as pd
	import plotly.graph_objects as go
	import yaml
	import numpy as np
	from google.oauth2 import service_account


	from utils.js import get_window_url_params

	# Logging
	logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)

	# Define constants
	DATE = "2020-01-01"
	YEAR = 2020
	LOCATION = [-74.653370, 5.845328]
	ROI_RADIUS = 20000
	GEE_SERVICE_ACCOUNT = (
	"climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
	)
	INDICES_FILE = "indices.yaml"
	START_YEAR = 2015
	END_YEAR = 2022


	class IndexGenerator:
	"""
	A class to generate indices and compute zonal means.

	Args:
	centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
	year (int): The year for which indices are generated.
	roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
	project_name (str, optional): The name of the project. Defaults to "".
	map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
	"""

	def __init__(
	self,
	centroid,
	roi_radius,
	year,
	indices_file,
	project_name="",
	map=None,
	):
	self.indices = self._load_indices(indices_file)
	self.centroid = centroid
	self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
	self.year = year
	self.start_date = str(datetime.date(self.year, 1, 1))
	self.end_date = str(datetime.date(self.year, 12, 31))
	self.daterange = [self.start_date, self.end_date]
	self.project_name = project_name
	self.map = map
	if self.map is not None:
	self.show = True
	else:
	self.show = False

	def _cloudfree(self, gee_path):
	"""
	Internal method to generate a cloud-free composite.

	Args:
	gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.

	Returns:
	ee.Image: The cloud-free composite clipped to the region of interest.
	"""
	# Load a raw Landsat ImageCollection for a single year.
	collection = (
	ee.ImageCollection(gee_path)
	.filterDate(*self.daterange)
	.filterBounds(self.roi)
	)

	# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
	composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
	**{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
	)
	return composite_cloudfree.clip(self.roi)

	def _load_indices(self, indices_file):
	# Read index configurations
	with open(indices_file, "r") as stream:
	try:
	return yaml.safe_load(stream)
	except yaml.YAMLError as e:
	logging.error(e)
	return None

	def show_map(self, map=None):
	if map is not None:
	self.map = map
	self.show = True

	def disable_map(self):
	self.show = False

	def generate_index(self, index_config):
	"""
	Generates an index based on the provided index configuration.

	Args:
	index_config (dict): Configuration for generating the index.

	Returns:
	ee.Image: The generated index clipped to the region of interest.
	"""
	match index_config["gee_type"]:
	case "image":
	dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
	if index_config.get("select"):
	dataset = dataset.select(index_config["select"])
	case "image_collection":
	dataset = (
	ee.ImageCollection(index_config["gee_path"])
	.filterBounds(self.roi)
	.map(lambda image: image.clip(self.roi))
	.mean()
	)
	if index_config.get("select"):
	dataset = dataset.select(index_config["select"])
	case "feature_collection":
	dataset = (
	ee.Image()
	.float()
	.paint(
	ee.FeatureCollection(index_config["gee_path"]),
	index_config["select"],
	)
	.clip(self.roi)
	)
	case "algebraic":
	image = self._cloudfree(index_config["gee_path"])
	dataset = image.normalizedDifference(["B4", "B3"])
	case _:
	dataset = None

	if not dataset:
	raise Exception("Failed to generate dataset.")
	if self.show and index_config.get("show"):
	map.addLayer(dataset, index_config["viz"], index_config["name"])
	logging.info(f"Generated index: {index_config['name']}")
	return dataset

	def zonal_mean_index(self, index_key):
	index_config = self.indices[index_key]
	dataset = self.generate_index(index_config)
	# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
	out = dataset.reduceRegion(
	**{
	"reducer": ee.Reducer.mean(),
	"geometry": self.roi,
	"scale": 200, # map scale
	}
	).getInfo()
	if index_config.get("bandname"):
	return out[index_config.get("bandname")]
	return out

	def generate_composite_index_df(self, indices=[]):
	data = {
	"metric": indices,
	"year": self.year,
	"centroid": str(self.centroid),
	"project_name": self.project_name,
	"value": list(map(self.zonal_mean_index, indices)),
	"area": self.roi.area().getInfo(), # m^2
	"geojson": str(self.roi.getInfo()),
	# to-do: coefficient
	}

	logging.info("data", data)
	df = pd.DataFrame(data)
	return df


	def set_up_duckdb():
	logging.info("set up duckdb")
	# use `climatebase` db
	if not os.getenv("motherduck_token"):
	raise Exception(
	"No motherduck token found. Please set the `motherduck_token` environment variable."
	)
	else:
	con = duckdb.connect("md:climatebase")
	con.sql("USE climatebase;")

	# load extensions
	con.sql("""INSTALL spatial; LOAD spatial;""")

	return con


	def authenticate_ee(ee_service_account):
	"""
	Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
	"""
	logging.info("authenticate_ee")
	credentials = ee.ServiceAccountCredentials(
	ee_service_account, key_data=os.environ["ee_service_account"]
	)
	ee.Initialize(credentials)


	def load_indices(indices_file):
	# Read index configurations
	with open(indices_file, "r") as stream:
	try:
	return yaml.safe_load(stream)
	except yaml.YAMLError as e:
	logging.error(e)
	return None


	def create_dataframe(years, project_name):
	dfs = []
	logging.info(years)
	indices = load_indices(INDICES_FILE)
	for year in years:
	logging.info(year)
	ig = IndexGenerator(
	centroid=LOCATION,
	roi_radius=ROI_RADIUS,
	year=year,
	indices_file=INDICES_FILE,
	project_name=project_name,
	)
	df = ig.generate_composite_index_df(list(indices.keys()))
	dfs.append(df)
	return pd.concat(dfs)


	# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
	def get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(
	longitudes=None, latitudes=None
	):
	"""Function documentation:\n
	Basic framework adopted from Krichardson under the following thread:
	https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7

	# NOTE:
	# THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
	# in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.

	Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
	the center coordinate tuple of all provided coordinate tuples.
	"""

	# Check whether both latitudes and longitudes have been passed,
	# or if the list lenghts don't match
	if (latitudes is None or longitudes is None) or (len(latitudes) != len(longitudes)):
	# Otherwise, return the default values of 0 zoom and the coordinate origin as center point
	return 0, (0, 0)

	# Get the boundary-box
	b_box = {}
	b_box["height"] = latitudes.max() - latitudes.min()
	b_box["width"] = longitudes.max() - longitudes.min()
	b_box["center"] = (np.mean(longitudes), np.mean(latitudes))

	# get the area of the bounding box in order to calculate a zoom-level
	area = b_box["height"] * b_box["width"]

	# * 1D-linear interpolation with numpy:
	# - Pass the area as the only x-value and not as a list, in order to return a scalar as well
	# - The x-points "xp" should be in parts in comparable order of magnitude of the given area
	# - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
	# which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
	# as these variables are antiproportional
	zoom = np.interp(
	x=area,
	xp=[0, 5-10, 4-10, 3-10, 2-10, 1-10, 1-5],
	fp=[20, 15, 14, 13, 12, 7, 5],
	)

	# Finally, return the zoom level and the associated boundary-box center coordinates
	return zoom, b_box["center"]


	def show_project_map(project_name):
	prepared_statement = con.execute(
	"SELECT geometry FROM project WHERE name = ? LIMIT 1", [project_name]
	).fetchall()
	features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
	geometry = features[0]["geometry"]
	longitudes = np.array(geometry["coordinates"])[0, :, 0]
	latitudes = np.array(geometry["coordinates"])[0, :, 1]
	(
	zoom,
	bbox_center,
	) = get_plotting_zoom_level_and_center_coordinates_from_lonlat_tuples(
	longitudes, latitudes
	)
	fig = go.Figure(
	go.Scattermapbox(
	mode="markers",
	lon=[bbox_center[0]],
	lat=[bbox_center[1]],
	marker={"size": 20, "color": ["cyan"]},
	)
	)

	fig.update_layout(
	mapbox={
	"style": "stamen-terrain",
	"center": {"lon": bbox_center[0], "lat": bbox_center[1]},
	"zoom": zoom,
	"layers": [
	{
	"source": {
	"type": "FeatureCollection",
	"features": [{"type": "Feature", "geometry": geometry}],
	},
	"type": "fill",
	"below": "traces",
	"color": "royalblue",
	}
	],
	},
	margin={"l": 0, "r": 0, "b": 0, "t": 0},
	)

	return fig


	# minMax.getInfo()
	def calculate_biodiversity_score(start_year, end_year, project_name):
	years = []
	for year in range(start_year, end_year):
	row_exists = con.execute(
	"SELECT COUNT(1) FROM bioindicator WHERE (year = ? AND project_name = ?)",
	[year, project_name],
	).fetchall()[0][0]
	if not row_exists:
	years.append(year)

	if len(years) > 0:
	df = create_dataframe(years, project_name)

	# Write score table to `_temptable`
	con.sql(
	"CREATE OR REPLACE TABLE _temptable AS SELECT , (value area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)"
	)

	# Create `bioindicator` table IF NOT EXISTS.
	con.sql(
	"""
	USE climatebase;
	CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
	"""
	)
	# UPSERT project record
	con.sql(
	"""
	INSERT INTO bioindicator FROM _temptable
	ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
	"""
	)
	logging.info("upsert records into motherduck")
	scores = con.execute(
	"SELECT * FROM bioindicator WHERE (year >= ? AND year <= ? AND project_name = ?)",
	[start_year, end_year, project_name],
	).df()
	return scores


	def motherduck_list_projects(author_id):
	return con.execute(
	"SELECT DISTINCT name FROM project WHERE authorId = ? AND geometry != 'null'",
	[author_id],
	).df()


	with gr.Blocks() as demo:
	# Environment setup
	authenticate_ee(GEE_SERVICE_ACCOUNT)
	con = set_up_duckdb()
	with gr.Column():
	m1 = gr.Plot()
	with gr.Row():
	project_name = gr.Dropdown([], label="Project", value="Select project")
	start_year = gr.Number(value=2017, label="Start Year", precision=0)
	end_year = gr.Number(value=2022, label="End Year", precision=0)
	with gr.Row():
	view_btn = gr.Button(value="Show project map")
	calc_btn = gr.Button(value="Calculate!")
	# save_btn = gr.Button(value="Save")
	results_df = gr.Dataframe(
	headers=["Year", "Project Name", "Score"],
	datatype=["number", "str", "number"],
	label="Biodiversity scores by year",
	)
	calc_btn.click(
	calculate_biodiversity_score,
	inputs=[start_year, end_year, project_name],
	outputs=results_df,
	)
	view_btn.click(
	fn=show_project_map,
	inputs=[project_name],
	outputs=[m1],
	)

	def update_project_dropdown_list(url_params):
	username = url_params.get("username", "default")
	projects = motherduck_list_projects(author_id=username)
	# to-do: filter projects based on user
	return gr.Dropdown.update(choices=projects["name"].tolist())

	# Get url params
	url_params = gr.JSON({"username": "default"}, visible=False, label="URL Params")

	# Gradio has a bug
	# For dropdown to update by demo.load, dropdown value must be called downstream
	b1 = gr.Button("Hidden button that fixes bug.", visible=False)
	b1.click(lambda x: x, inputs=project_name, outputs=[])

	# Update project dropdown list on page load
	demo.load(
	fn=update_project_dropdown_list,
	inputs=[url_params],
	outputs=[project_name],
	_js=get_window_url_params,
	queue=False,
	)

	demo.launch()