Johannes commited on
Commit
80019c9
β€’
1 Parent(s): c18005d
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +234 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Species Distribution Modeling
3
- emoji: πŸƒ
4
  colorFrom: green
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
  app_file: app.py
 
1
  ---
2
  title: Species Distribution Modeling
3
+ emoji: πŸ¦₯🐁
4
  colorFrom: green
5
+ colorTo: white
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from time import time
3
+
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+
7
+ from sklearn.utils import Bunch
8
+ from sklearn.datasets import fetch_species_distributions
9
+ from sklearn import svm, metrics
10
+
11
+ from typing import Union
12
+
13
+ try:
14
+ from mpl_toolkits.basemap import Basemap
15
+
16
+ basemap = True
17
+ except ImportError:
18
+ basemap = False
19
+
20
+
21
+ def construct_grids(batch):
22
+ """Construct the map grid from the batch object
23
+
24
+ Parameters
25
+ ----------
26
+ batch : Batch object
27
+ The object returned by :func:`fetch_species_distributions`
28
+
29
+ Returns
30
+ -------
31
+ (xgrid, ygrid) : 1-D arrays
32
+ The grid corresponding to the values in batch.coverages
33
+ """
34
+ # x,y coordinates for corner cells
35
+ xmin = batch.x_left_lower_corner + batch.grid_size
36
+ xmax = xmin + (batch.Nx * batch.grid_size)
37
+ ymin = batch.y_left_lower_corner + batch.grid_size
38
+ ymax = ymin + (batch.Ny * batch.grid_size)
39
+
40
+ # x coordinates of the grid cells
41
+ xgrid = np.arange(xmin, xmax, batch.grid_size)
42
+ # y coordinates of the grid cells
43
+ ygrid = np.arange(ymin, ymax, batch.grid_size)
44
+
45
+ return (xgrid, ygrid)
46
+
47
+
48
+ def create_species_bunch(species_name, train, test, coverages, xgrid, ygrid):
49
+ """Create a bunch with information about a particular organism
50
+
51
+ This will use the test/train record arrays to extract the
52
+ data specific to the given species name.
53
+ """
54
+ bunch = Bunch(name=" ".join(species_name.split("_")[:2]))
55
+ species_name = species_name.encode("ascii")
56
+ points = dict(test=test, train=train)
57
+
58
+ for label, pts in points.items():
59
+ # choose points associated with the desired species
60
+ pts = pts[pts["species"] == species_name]
61
+ bunch["pts_%s" % label] = pts
62
+
63
+ # determine coverage values for each of the training & testing points
64
+ ix = np.searchsorted(xgrid, pts["dd long"])
65
+ iy = np.searchsorted(ygrid, pts["dd lat"])
66
+ bunch["cov_%s" % label] = coverages[:, -iy, ix].T
67
+
68
+ return bunch
69
+
70
+
71
+ def translate_choice(choice: str) -> Union[str, tuple[str, str]]:
72
+ if choice == "Bradypus variegatus":
73
+ return "bradypus_variegatus_0"
74
+ elif choice == "Microryzomys minutus":
75
+ return "microryzomys_minutus_0"
76
+ else:
77
+ return ("bradypus_variegatus_0", "microryzomys_minutus_0")
78
+
79
+
80
+ def plot_species_distribution(
81
+ choice: Union[str, tuple[str, str]]
82
+ ):
83
+ """
84
+ Plot the species distribution.
85
+ """
86
+ species = translate_choice(choice)
87
+
88
+ t0 = time()
89
+
90
+ # Load the compressed data
91
+ data = fetch_species_distributions()
92
+
93
+ # Set up the data grid
94
+ xgrid, ygrid = construct_grids(data)
95
+
96
+ # The grid in x,y coordinates
97
+ X, Y = np.meshgrid(xgrid, ygrid[::-1])
98
+
99
+ species_bunches = []
100
+
101
+ if isinstance(species, tuple):
102
+ # create a bunch for each species
103
+ BV_bunch = create_species_bunch(
104
+ species[0], data.train, data.test, data.coverages, xgrid, ygrid
105
+ )
106
+ MM_bunch = create_species_bunch(
107
+ species[1], data.train, data.test, data.coverages, xgrid, ygrid
108
+ )
109
+
110
+ species_bunches.extend([BV_bunch, MM_bunch])
111
+ else:
112
+ # create a bunch for the given species
113
+ species_bunch = create_species_bunch(
114
+ species, data.train, data.test, data.coverages, xgrid, ygrid
115
+ )
116
+ species_bunches.append(species_bunch)
117
+
118
+ # background points (grid coordinates) for evaluation
119
+ np.random.seed(13)
120
+ background_points = np.c_[
121
+ np.random.randint(low=0, high=data.Ny, size=10000),
122
+ np.random.randint(low=0, high=data.Nx, size=10000),
123
+ ].T
124
+
125
+ # We'll make use of the fact that coverages[6] has measurements at all
126
+ # land points. This will help us decide between land and water.
127
+ land_reference = data.coverages[6]
128
+
129
+ # Fit, predict, and plot for each species.
130
+ for i, species in enumerate(species_bunches):
131
+ print("_" * 80)
132
+ print("Modeling distribution of species '%s'" % species.name)
133
+
134
+ # Standardize features
135
+ mean = species.cov_train.mean(axis=0)
136
+ std = species.cov_train.std(axis=0)
137
+ train_cover_std = (species.cov_train - mean) / std
138
+
139
+ # Fit OneClassSVM
140
+ print(" - fit OneClassSVM ... ", end="")
141
+ clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
142
+ clf.fit(train_cover_std)
143
+ print("done.")
144
+
145
+ # Plot map of South America
146
+ plt.subplot(1, len(species_bunches), i + 1)
147
+ if basemap:
148
+ print(" - plot coastlines using basemap")
149
+ m = Basemap(
150
+ projection="cyl",
151
+ llcrnrlat=Y.min(),
152
+ urcrnrlat=Y.max(),
153
+ llcrnrlon=X.min(),
154
+ urcrnrlon=X.max(),
155
+ resolution="c",
156
+ )
157
+ m.drawcoastlines()
158
+ m.drawcountries()
159
+ else:
160
+ print(" - plot coastlines from coverage")
161
+ plt.contour(
162
+ X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid"
163
+ )
164
+ plt.xticks([])
165
+ plt.yticks([])
166
+
167
+ print(" - predict species distribution")
168
+
169
+ # Predict species distribution using the training data
170
+ Z = np.ones((data.Ny, data.Nx), dtype=np.float64)
171
+
172
+ # We'll predict only for the land points.
173
+ idx = np.where(land_reference > -9999)
174
+ coverages_land = data.coverages[:, idx[0], idx[1]].T
175
+
176
+ pred = clf.decision_function((coverages_land - mean) / std)
177
+ Z *= pred.min()
178
+ Z[idx[0], idx[1]] = pred
179
+
180
+ levels = np.linspace(Z.min(), Z.max(), 25)
181
+ Z[land_reference == -9999] = -9999
182
+
183
+ # plot contours of the prediction
184
+ plt.contourf(X, Y, Z, levels=levels, cmap="Reds")
185
+ plt.colorbar(format="%.2f")
186
+
187
+ # scatter training/testing points
188
+ plt.scatter(
189
+ species.pts_train["dd long"],
190
+ species.pts_train["dd lat"],
191
+ s=2**2,
192
+ c="black",
193
+ marker="^",
194
+ label="train",
195
+ )
196
+ plt.scatter(
197
+ species.pts_test["dd long"],
198
+ species.pts_test["dd lat"],
199
+ s=2**2,
200
+ c="black",
201
+ marker="x",
202
+ label="test",
203
+ )
204
+ plt.legend()
205
+ plt.title(species.name)
206
+ plt.axis("equal")
207
+
208
+ # Compute AUC with regards to background points
209
+ pred_background = Z[background_points[0], background_points[1]]
210
+ pred_test = clf.decision_function((species.cov_test - mean) / std)
211
+ scores = np.r_[pred_test, pred_background]
212
+ y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)]
213
+ fpr, tpr, thresholds = metrics.roc_curve(y, scores)
214
+ roc_auc = metrics.auc(fpr, tpr)
215
+ plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right")
216
+ print("\n Area under the ROC curve : %f" % roc_auc)
217
+
218
+ print("\ntime elapsed: %.2fs" % (time() - t0))
219
+ return plt
220
+
221
+
222
+ iface = gr.Interface(
223
+ fn=plot_species_distribution,
224
+ inputs=gr.Radio(choices=["Bradypus variegatus","Microryzomys minutus", "Both"],
225
+ value="Bradypus variegatus",
226
+ label="Species"),
227
+ outputs=gr.Plot(label="Distribution Map"),
228
+ title="Species Distribution Map",
229
+ description="""This app predicts the distribution of a species using a OneClassSVM. Following [this tutorial](https://scikit-learn.org/stable/auto_examples/applications/plot_species_distribution_modeling.html#sphx-glr-auto-examples-applications-plot-species-distribution-modeling-py) from sklearn""",
230
+ examples=[
231
+ ["Bradypus variegatus"],
232
+ ["Microryzomys minutus"]])
233
+
234
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn
2
+ basemap