# make sure ipython inline mode is activated %pylab inline # all of this is copied from the previous notebook, '06_iris_dimensionality' from sklearn.datasets import load_iris from sklearn.decomposition import PCA import pylab as pl from itertools import cycle iris = load_iris() X = iris.data y = iris.target pca = PCA(n_components=2, whiten=True).fit(X) X_pca = pca.transform(X) def plot_2D(data, target, target_names): colors = cycle('rgbcmykw') target_ids = range(len(target_names)) pl.figure() for i, c, label in zip(target_ids, colors, target_names): pl.scatter(data[target == i, 0], data[target == i, 1], c=c, label=label) pl.legend() plot_2D(X_pca, iris.target, iris.target_names) from sklearn.cluster import KMeans from numpy.random import RandomState rng = RandomState(42) kmeans = KMeans(n_clusters=3, random_state=rng) kmeans.fit(X_pca) import numpy as np np.round(kmeans.cluster_centers_, decimals=2) kmeans.labels_ plot_2D(X_pca, kmeans.labels_, ["c0", "c1", "c2"]) from sklearn.mixture import GMM gmm = GMM(n_components=3, covariance_type='tied') gmm.fit(X_pca) plot_2D(X_pca, gmm.predict(X_pca), ["c0", "c1", "c2"]) plt.title('GMM labels') plot_2D(X_pca, iris.target, iris.target_names) plt.title('True labels')