Johannes
init
80019c9
raw
history blame
7.62 kB
import gradio as gr
from time import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import Bunch
from sklearn.datasets import fetch_species_distributions
from sklearn import svm, metrics
from typing import Union
try:
from mpl_toolkits.basemap import Basemap
basemap = True
except ImportError:
basemap = False
def construct_grids(batch):
"""Construct the map grid from the batch object
Parameters
----------
batch : Batch object
The object returned by :func:`fetch_species_distributions`
Returns
-------
(xgrid, ygrid) : 1-D arrays
The grid corresponding to the values in batch.coverages
"""
# x,y coordinates for corner cells
xmin = batch.x_left_lower_corner + batch.grid_size
xmax = xmin + (batch.Nx * batch.grid_size)
ymin = batch.y_left_lower_corner + batch.grid_size
ymax = ymin + (batch.Ny * batch.grid_size)
# x coordinates of the grid cells
xgrid = np.arange(xmin, xmax, batch.grid_size)
# y coordinates of the grid cells
ygrid = np.arange(ymin, ymax, batch.grid_size)
return (xgrid, ygrid)
def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid):
"""Create a bunch with information about a particular organism
This will use the test/train record arrays to extract the
data specific to the given species name.
"""
bunch = Bunch(name=" ".join(species_name.split("_")[:2]))
species_name = species_name.encode("ascii")
points = dict(test=test, train=train)
for label, pts in points.items():
# choose points associated with the desired species
pts = pts[pts["species"] == species_name]
bunch["pts_%s" % label] = pts
# determine coverage values for each of the training & testing points
ix = np.searchsorted(xgrid, pts["dd long"])
iy = np.searchsorted(ygrid, pts["dd lat"])
bunch["cov_%s" % label] = coverages[:, -iy, ix].T
return bunch
def translate_choice(choice: str) -> Union[str, tuple[str, str]]:
if choice == "Bradypus variegatus":
return "bradypus_variegatus_0"
elif choice == "Microryzomys minutus":
return "microryzomys_minutus_0"
else:
return ("bradypus_variegatus_0", "microryzomys_minutus_0")
def plot_species_distribution(
choice: Union[str, tuple[str, str]]
):
"""
Plot the species distribution.
"""
species = translate_choice(choice)
t0 = time()
# Load the compressed data
data = fetch_species_distributions()
# Set up the data grid
xgrid, ygrid = construct_grids(data)
# The grid in x,y coordinates
X, Y = np.meshgrid(xgrid, ygrid[::-1])
species_bunches = []
if isinstance(species, tuple):
# create a bunch for each species
BV_bunch = create_species_bunch(
species[0], data.train, data.test, data.coverages, xgrid, ygrid
)
MM_bunch = create_species_bunch(
species[1], data.train, data.test, data.coverages, xgrid, ygrid
)
species_bunches.extend([BV_bunch, MM_bunch])
else:
# create a bunch for the given species
species_bunch = create_species_bunch(
species, data.train, data.test, data.coverages, xgrid, ygrid
)
species_bunches.append(species_bunch)
# background points (grid coordinates) for evaluation
np.random.seed(13)
background_points = np.c_[
np.random.randint(low=0, high=data.Ny, size=10000),
np.random.randint(low=0, high=data.Nx, size=10000),
].T
# We'll make use of the fact that coverages[6] has measurements at all
# land points. This will help us decide between land and water.
land_reference = data.coverages[6]
# Fit, predict, and plot for each species.
for i, species in enumerate(species_bunches):
print("_" * 80)
print("Modeling distribution of species '%s'" % species.name)
# Standardize features
mean = species.cov_train.mean(axis=0)
std = species.cov_train.std(axis=0)
train_cover_std = (species.cov_train - mean) / std
# Fit OneClassSVM
print(" - fit OneClassSVM ... ", end="")
clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
clf.fit(train_cover_std)
print("done.")
# Plot map of South America
plt.subplot(1, len(species_bunches), i + 1)
if basemap:
print(" - plot coastlines using basemap")
m = Basemap(
projection="cyl",
llcrnrlat=Y.min(),
urcrnrlat=Y.max(),
llcrnrlon=X.min(),
urcrnrlon=X.max(),
resolution="c",
)
m.drawcoastlines()
m.drawcountries()
else:
print(" - plot coastlines from coverage")
plt.contour(
X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid"
)
plt.xticks([])
plt.yticks([])
print(" - predict species distribution")
# Predict species distribution using the training data
Z = np.ones((data.Ny, data.Nx), dtype=np.float64)
# We'll predict only for the land points.
idx = np.where(land_reference > -9999)
coverages_land = data.coverages[:, idx[0], idx[1]].T
pred = clf.decision_function((coverages_land - mean) / std)
Z *= pred.min()
Z[idx[0], idx[1]] = pred
levels = np.linspace(Z.min(), Z.max(), 25)
Z[land_reference == -9999] = -9999
# plot contours of the prediction
plt.contourf(X, Y, Z, levels=levels, cmap="Reds")
plt.colorbar(format="%.2f")
# scatter training/testing points
plt.scatter(
species.pts_train["dd long"],
species.pts_train["dd lat"],
s=2**2,
c="black",
marker="^",
label="train",
)
plt.scatter(
species.pts_test["dd long"],
species.pts_test["dd lat"],
s=2**2,
c="black",
marker="x",
label="test",
)
plt.legend()
plt.title(species.name)
plt.axis("equal")
# Compute AUC with regards to background points
pred_background = Z[background_points[0], background_points[1]]
pred_test = clf.decision_function((species.cov_test - mean) / std)
scores = np.r_[pred_test, pred_background]
y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
fpr, tpr, thresholds = metrics.roc_curve(y, scores)
roc_auc = metrics.auc(fpr, tpr)
plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
print("\n Area under the ROC curve : %f" % roc_auc)
print("\ntime elapsed: %.2fs" % (time() - t0))
return plt
iface = gr.Interface(
fn=plot_species_distribution,
inputs=gr.Radio(choices=["Bradypus variegatus","Microryzomys minutus", "Both"],
value="Bradypus variegatus",
label="Species"),
outputs=gr.Plot(label="Distribution Map"),
title="Species Distribution Map",
description="""This app predicts the distribution of a species using a OneClassSVM. Following [this tutorial](https://scikit-learn.org/stable/auto_examples/applications/plot_species_distribution_modeling.html#sphx-glr-auto-examples-applications-plot-species-distribution-modeling-py) from sklearn""",
examples=[
["Bradypus variegatus"],
["Microryzomys minutus"]])
iface.launch()