from sklearn.datasets import load_boston data = load_boston() print data.keys() print data.data.shape print data.target.shape print data.DESCR %pylab inline plt.hist(data.target) plt.xlabel('price ($1000s)') plt.ylabel('count') from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) from sklearn.linear_model import LinearRegression clf = LinearRegression() clf.fit(X_train, y_train) predicted = clf.predict(X_test) expected = y_test plt.scatter(expected, predicted) plt.plot([0, 50], [0, 50], '--k') plt.axis('tight') plt.xlabel('True price ($1000s)') plt.ylabel('Predicted price ($1000s)') print "RMS:", np.sqrt(np.mean((predicted - expected) ** 2)) from sklearn.ensemble import GradientBoostingRegressor # Instantiate the model, fit the results, and scatter in vs. out %load solutions/04B_houses_regression.py