Treating crossings each day as features to learn about the relationships between various days.
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from jupyterworkflow.data import get_fremont_data
data = get_fremont_data()
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.plot(legend=False, alpha=0.01);
X = pivoted.fillna(0).T.values
X.shape
(1610, 24)
X2 = PCA(2, svd_solver='full').fit_transform(X)
X2.shape
(1610, 2)
plt.scatter(X2[:, 0], X2[:, 1]);
gmm = GaussianMixture(2).fit(X)
labels = gmm.predict(X)
plt.scatter(X2[:, 0], X2[:, 1], c=labels, cmap='rainbow')
plt.colorbar();
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.1, ax=ax[0]);
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.1, ax=ax[1]);
ax[0].set_title('Purple Cluster')
ax[1].set_title('Red Cluster');
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
plt.scatter(X2[:, 0], X2[:, 1], c=dayofweek, cmap='rainbow')
plt.colorbar();
The following points are weekdays with a holiday-like pattern
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]
DatetimeIndex(['2012-10-03', '2012-10-04', '2012-10-05', '2012-10-08',
'2012-10-09', '2012-10-10', '2012-10-11', '2012-10-12',
'2012-10-15', '2012-10-16',
...
'2017-02-15', '2017-02-16', '2017-02-17', '2017-02-20',
'2017-02-21', '2017-02-22', '2017-02-23', '2017-02-24',
'2017-02-27', '2017-02-28'],
dtype='datetime64[ns]', length=1109, freq=None)
What's up with Feb 6, 2017? Snow Storm