Spaces:

johko
/

species-distribution-modeling

Runtime error

App Files Files Community

Johannes commited on Jun 25, 2023

Commit

80019c9

•

1 Parent(s): c18005d

init

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +234 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Species Distribution Modeling
-emoji: 🏃
 colorFrom: green
-colorTo: green
 sdk: gradio
 sdk_version: 3.35.2
 app_file: app.py

 ---
 title: Species Distribution Modeling
+emoji: 🦥🐁
 colorFrom: green
+colorTo: white
 sdk: gradio
 sdk_version: 3.35.2
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import gradio as gr
+from time import time
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.utils import Bunch
+from sklearn.datasets import fetch_species_distributions
+from sklearn import svm, metrics
+from typing import Union
+try:
+    from mpl_toolkits.basemap import Basemap
+    basemap = True
+except ImportError:
+    basemap = False
+def construct_grids(batch):
+    """Construct the map grid from the batch object
+    Parameters
+    ----------
+    batch : Batch object
+        The object returned by :func:`fetch_species_distributions`
+    Returns
+    -------
+    (xgrid, ygrid) : 1-D arrays
+        The grid corresponding to the values in batch.coverages
+    """
+    # x,y coordinates for corner cells
+    xmin = batch.x_left_lower_corner + batch.grid_size
+    xmax = xmin + (batch.Nx * batch.grid_size)
+    ymin = batch.y_left_lower_corner + batch.grid_size
+    ymax = ymin + (batch.Ny * batch.grid_size)
+    # x coordinates of the grid cells
+    xgrid = np.arange(xmin, xmax, batch.grid_size)
+    # y coordinates of the grid cells
+    ygrid = np.arange(ymin, ymax, batch.grid_size)
+    return (xgrid, ygrid)
+def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid):
+    """Create a bunch with information about a particular organism
+    This will use the test/train record arrays to extract the
+    data specific to the given species name.
+    """
+    bunch = Bunch(name=" ".join(species_name.split("_")[:2]))
+    species_name = species_name.encode("ascii")
+    points = dict(test=test, train=train)
+    for label, pts in points.items():
+        # choose points associated with the desired species
+        pts = pts[pts["species"] == species_name]
+        bunch["pts_%s" % label] = pts
+        # determine coverage values for each of the training & testing points
+        ix = np.searchsorted(xgrid, pts["dd long"])
+        iy = np.searchsorted(ygrid, pts["dd lat"])
+        bunch["cov_%s" % label] = coverages[:, -iy, ix].T
+    return bunch
+def translate_choice(choice: str) -> Union[str, tuple[str, str]]:
+    if choice == "Bradypus variegatus":
+        return "bradypus_variegatus_0"
+    elif choice == "Microryzomys minutus":
+        return "microryzomys_minutus_0"
+    else:
+        return ("bradypus_variegatus_0", "microryzomys_minutus_0")
+def plot_species_distribution(
+    choice: Union[str, tuple[str, str]]
+):
+    """
+    Plot the species distribution.
+    """
+    species = translate_choice(choice)
+    t0 = time()
+    # Load the compressed data
+    data = fetch_species_distributions()
+    # Set up the data grid
+    xgrid, ygrid = construct_grids(data)
+    # The grid in x,y coordinates
+    X, Y = np.meshgrid(xgrid, ygrid[::-1])
+    species_bunches = []
+    if isinstance(species, tuple):
+        # create a bunch for each species
+        BV_bunch = create_species_bunch(
+            species[0], data.train, data.test, data.coverages, xgrid, ygrid
+        )
+        MM_bunch = create_species_bunch(
+            species[1], data.train, data.test, data.coverages, xgrid, ygrid
+        )
+        species_bunches.extend([BV_bunch, MM_bunch])
+    else:
+        # create a bunch for the given species
+        species_bunch = create_species_bunch(
+            species, data.train, data.test, data.coverages, xgrid, ygrid
+        )
+        species_bunches.append(species_bunch)
+    # background points (grid coordinates) for evaluation
+    np.random.seed(13)
+    background_points = np.c_[
+        np.random.randint(low=0, high=data.Ny, size=10000),
+        np.random.randint(low=0, high=data.Nx, size=10000),
+    ].T
+    # We'll make use of the fact that coverages[6] has measurements at all
+    # land points.  This will help us decide between land and water.
+    land_reference = data.coverages[6]
+    # Fit, predict, and plot for each species.
+    for i, species in enumerate(species_bunches):
+        print("_" * 80)
+        print("Modeling distribution of species '%s'" % species.name)
+        # Standardize features
+        mean = species.cov_train.mean(axis=0)
+        std = species.cov_train.std(axis=0)
+        train_cover_std = (species.cov_train - mean) / std
+        # Fit OneClassSVM
+        print(" - fit OneClassSVM ... ", end="")
+        clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
+        clf.fit(train_cover_std)
+        print("done.")
+        # Plot map of South America
+        plt.subplot(1, len(species_bunches), i + 1)
+        if basemap:
+            print(" - plot coastlines using basemap")
+            m = Basemap(
+                projection="cyl",
+                llcrnrlat=Y.min(),
+                urcrnrlat=Y.max(),
+                llcrnrlon=X.min(),
+                urcrnrlon=X.max(),
+                resolution="c",
+            )
+            m.drawcoastlines()
+            m.drawcountries()
+        else:
+            print(" - plot coastlines from coverage")
+            plt.contour(
+                X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid"
+            )
+            plt.xticks([])
+            plt.yticks([])
+        print(" - predict species distribution")
+        # Predict species distribution using the training data
+        Z = np.ones((data.Ny, data.Nx), dtype=np.float64)
+        # We'll predict only for the land points.
+        idx = np.where(land_reference > -9999)
+        coverages_land = data.coverages[:, idx[0], idx[1]].T
+        pred = clf.decision_function((coverages_land - mean) / std)
+        Z *= pred.min()
+        Z[idx[0], idx[1]] = pred
+        levels = np.linspace(Z.min(), Z.max(), 25)
+        Z[land_reference == -9999] = -9999
+        # plot contours of the prediction
+        plt.contourf(X, Y, Z, levels=levels, cmap="Reds")
+        plt.colorbar(format="%.2f")
+        # scatter training/testing points
+        plt.scatter(
+            species.pts_train["dd long"],
+            species.pts_train["dd lat"],
+            s=2**2,
+            c="black",
+            marker="^",
+            label="train",
+        )
+        plt.scatter(
+            species.pts_test["dd long"],
+            species.pts_test["dd lat"],
+            s=2**2,
+            c="black",
+            marker="x",
+            label="test",
+        )
+        plt.legend()
+        plt.title(species.name)
+        plt.axis("equal")
+        # Compute AUC with regards to background points
+        pred_background = Z[background_points[0], background_points[1]]
+        pred_test = clf.decision_function((species.cov_test - mean) / std)
+        scores = np.r_[pred_test, pred_background]
+        y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
+        fpr, tpr, thresholds = metrics.roc_curve(y, scores)
+        roc_auc = metrics.auc(fpr, tpr)
+        plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
+        print("\n Area under the ROC curve : %f" % roc_auc)
+    print("\ntime elapsed: %.2fs" % (time() - t0))
+    return plt
+iface = gr.Interface(
+    fn=plot_species_distribution,
+    inputs=gr.Radio(choices=["Bradypus variegatus","Microryzomys minutus", "Both"],
+                    value="Bradypus variegatus",
+                    label="Species"),
+    outputs=gr.Plot(label="Distribution Map"),
+    title="Species Distribution Map",
+    description="""This app predicts the distribution of a species using a OneClassSVM. Following [this tutorial](https://scikit-learn.org/stable/auto_examples/applications/plot_species_distribution_modeling.html#sphx-glr-auto-examples-applications-plot-species-distribution-modeling-py) from sklearn""",
+    examples=[
+        ["Bradypus variegatus"],
+        ["Microryzomys minutus"]])
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ scikit-learn
2	+ basemap