{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Kernel Density Estimate of Species Distributions\nThis shows an example of a neighbors-based query (in particular a kernel\ndensity estimate) on geospatial data, using a Ball Tree built upon the\nHaversine distance metric -- i.e. distances over points in latitude/longitude.\nThe dataset is provided by Phillips et. al. (2006).\nIf available, the example uses\n`basemap `_\nto plot the coast lines and national boundaries of South America.\n\nThis example does not perform any learning over the data\n(see `sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` for\nan example of classification based on the attributes in this dataset). It\nsimply shows the kernel density estimate of observed data points in\ngeospatial coordinates.\n\nThe two species are:\n\n - `\"Bradypus variegatus\"\n `_ ,\n the Brown-throated Sloth.\n\n - `\"Microryzomys minutus\"\n `_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n## References\n\n * `\"Maximum entropy modeling of species geographic distributions\"\n `_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Author: Jake Vanderplas \n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_species_distributions\nfrom sklearn.neighbors import KernelDensity\n\n# if basemap is available, we'll use it.\n# otherwise, we'll improvise later...\ntry:\n from mpl_toolkits.basemap import Basemap\n basemap = True\nexcept ImportError:\n basemap = False\n\n\ndef construct_grids(batch):\n \"\"\"Construct the map grid from the batch object\n\n Parameters\n ----------\n batch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\n Returns\n -------\n (xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages\n \"\"\"\n # x,y coordinates for corner cells\n xmin = batch.x_left_lower_corner + batch.grid_size\n xmax = xmin + (batch.Nx * batch.grid_size)\n ymin = batch.y_left_lower_corner + batch.grid_size\n ymax = ymin + (batch.Ny * batch.grid_size)\n\n # x coordinates of the grid cells\n xgrid = np.arange(xmin, xmax, batch.grid_size)\n # y coordinates of the grid cells\n ygrid = np.arange(ymin, ymax, batch.grid_size)\n\n return (xgrid, ygrid)\n\n\n# Get matrices/arrays of species IDs and locations\ndata = fetch_species_distributions()\nspecies_names = ['Bradypus Variegatus', 'Microryzomys Minutus']\n\nXtrain = np.vstack([data['train']['dd lat'],\n data['train']['dd long']]).T\nytrain = np.array([d.decode('ascii').startswith('micro')\n for d in data['train']['species']], dtype='int')\nXtrain *= np.pi / 180. # Convert lat/long to radians\n\n# Set up the data grid for the contour plot\nxgrid, ygrid = construct_grids(data)\nX, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])\nland_reference = data.coverages[6][::5, ::5]\nland_mask = (land_reference > -9999).ravel()\n\nxy = np.vstack([Y.ravel(), X.ravel()]).T\nxy = xy[land_mask]\nxy *= np.pi / 180.\n\n# Plot map of South America with distributions of each species\nfig = plt.figure()\nfig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)\n\nfor i in range(2):\n plt.subplot(1, 2, i + 1)\n\n # construct a kernel density estimate of the distribution\n print(\" - computing KDE in spherical coordinates\")\n kde = KernelDensity(bandwidth=0.04, metric='haversine',\n kernel='gaussian', algorithm='ball_tree')\n kde.fit(Xtrain[ytrain == i])\n\n # evaluate only on the land: -9999 indicates ocean\n Z = np.full(land_mask.shape[0], -9999, dtype='int')\n Z[land_mask] = np.exp(kde.score_samples(xy))\n Z = Z.reshape(X.shape)\n\n # plot contours of the density\n levels = np.linspace(0, Z.max(), 25)\n plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)\n\n if basemap:\n print(\" - plot coastlines using basemap\")\n m = Basemap(projection='cyl', llcrnrlat=Y.min(),\n urcrnrlat=Y.max(), llcrnrlon=X.min(),\n urcrnrlon=X.max(), resolution='c')\n m.drawcoastlines()\n m.drawcountries()\n else:\n print(\" - plot coastlines from coverage\")\n plt.contour(X, Y, land_reference,\n levels=[-9998], colors=\"k\",\n linestyles=\"solid\")\n plt.xticks([])\n plt.yticks([])\n\n plt.title(species_names[i])\n\nplt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}