.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/mixture/plot_gmm_selection.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code or to run this example in your browser via Binder .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_mixture_plot_gmm_selection.py: ================================ Gaussian Mixture Model Selection ================================ This example shows that model selection can be performed with Gaussian Mixture Models using information-theoretic criteria (BIC). Model selection concerns both the covariance type and the number of components in the model. In that case, AIC also provides the right result (not shown to save time), but BIC is better suited if the problem is to identify the right model. Unlike Bayesian procedures, such inferences are prior-free. In that case, the model with 2 components and full covariance (which corresponds to the true generative model) is selected. .. GENERATED FROM PYTHON SOURCE LINES 17-101 .. image:: /auto_examples/mixture/images/sphx_glr_plot_gmm_selection_001.png :alt: BIC score per model, Selected GMM: full model, 2 components :class: sphx-glr-single-img .. code-block:: default import numpy as np import itertools from scipy import linalg import matplotlib.pyplot as plt import matplotlib as mpl from sklearn import mixture print(__doc__) # Number of samples per component n_samples = 500 # Generate random sample, two components np.random.seed(0) C = np.array([[0., -0.1], [1.7, .4]]) X = np.r_[np.dot(np.random.randn(n_samples, 2), C), .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])] lowest_bic = np.infty bic = [] n_components_range = range(1, 7) cv_types = ['spherical', 'tied', 'diag', 'full'] for cv_type in cv_types: for n_components in n_components_range: # Fit a Gaussian mixture with EM gmm = mixture.GaussianMixture(n_components=n_components, covariance_type=cv_type) gmm.fit(X) bic.append(gmm.bic(X)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm bic = np.array(bic) color_iter = itertools.cycle(['navy', 'turquoise', 'cornflowerblue', 'darkorange']) clf = best_gmm bars = [] # Plot the BIC scores plt.figure(figsize=(8, 6)) spl = plt.subplot(2, 1, 1) for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)): xpos = np.array(n_components_range) + .2 * (i - 2) bars.append(plt.bar(xpos, bic[i * len(n_components_range): (i + 1) * len(n_components_range)], width=.2, color=color)) plt.xticks(n_components_range) plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()]) plt.title('BIC score per model') xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\ .2 * np.floor(bic.argmin() / len(n_components_range)) plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14) spl.set_xlabel('Number of components') spl.legend([b[0] for b in bars], cv_types) # Plot the winner splot = plt.subplot(2, 1, 2) Y_ = clf.predict(X) for i, (mean, cov, color) in enumerate(zip(clf.means_, clf.covariances_, color_iter)): v, w = linalg.eigh(cov) if not np.any(Y_ == i): continue plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color) # Plot an ellipse to show the Gaussian component angle = np.arctan2(w[0][1], w[0][0]) angle = 180. * angle / np.pi # convert to degrees v = 2. * np.sqrt(2.) * np.sqrt(v) ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color) ell.set_clip_box(splot.bbox) ell.set_alpha(.5) splot.add_artist(ell) plt.xticks(()) plt.yticks(()) plt.title(f'Selected GMM: {best_gmm.covariance_type} model, ' f'{best_gmm.n_components} components') plt.subplots_adjust(hspace=.35, bottom=.02) plt.show() .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 1.876 seconds) .. _sphx_glr_download_auto_examples_mixture_plot_gmm_selection.py: .. only :: html .. container:: sphx-glr-footer :class: sphx-glr-footer-example .. container:: binder-badge .. image:: images/binder_badge_logo.svg :target: https://mybinder.org/v2/gh/scikit-learn/scikit-learn/0.24.X?urlpath=lab/tree/notebooks/auto_examples/mixture/plot_gmm_selection.ipynb :alt: Launch binder :width: 150 px .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_gmm_selection.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_gmm_selection.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_