In [ ]:

%matplotlib inline
from fastai2.basics import *
import gzip

MNIST SGD¶

Get the 'pickled' MNIST dataset from http://deeplearning.net/data/mnist/mnist.pkl.gz. We're going to treat it as a standard flat dataset with fully connected layers, rather than using a CNN.

In [ ]:

path = Config().data/'mnist'

In [ ]:

path.ls()

Out[ ]:

(#1) [/home/sgugger/.fastai/data/mnist/mnist.pkl.gz]

In [ ]:

with gzip.open(path/'mnist.pkl.gz', 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

In [ ]:

plt.imshow(x_train[0].reshape((28,28)), cmap="gray")
x_train.shape

Out[ ]:

(50000, 784)

In [ ]:

x_train,y_train,x_valid,y_valid = map(torch.tensor, (x_train,y_train,x_valid,y_valid))
n,c = x_train.shape
x_train.shape, y_train.min(), y_train.max()

Out[ ]:

(torch.Size([50000, 784]), tensor(0), tensor(9))

In lesson2-sgd we did these things ourselves:

x = torch.ones(n,2) 
def mse(y_hat, y): return ((y_hat-y)**2).mean()
y_hat = x@a

Now instead we'll use PyTorch's functions to do it for us, and also to handle mini-batches (which we didn't do last time, since our dataset was so small).

In [ ]:

from torch.utils.data import TensorDataset

In [ ]:

bs=64
train_ds = TensorDataset(x_train, y_train)
valid_ds = TensorDataset(x_valid, y_valid)
train_dl = TfmdDL(train_ds, bs=bs, shuffle=True)
valid_dl = TfmdDL(valid_ds, bs=2*bs)
dls = DataLoaders(train_dl, valid_dl)

In [ ]:

x,y = dls.one_batch()
x.shape,y.shape

Out[ ]:

(torch.Size([64, 784]), torch.Size([64]))

In [ ]:

class Mnist_Logistic(Module):
    def __init__(self): self.lin = nn.Linear(784, 10, bias=True)
    def forward(self, xb): return self.lin(xb)

In [ ]:

model = Mnist_Logistic().cuda()

In [ ]:

model

Out[ ]:

Mnist_Logistic(
  (lin): Linear(in_features=784, out_features=10, bias=True)
)

In [ ]:

model.lin

Out[ ]:

Linear(in_features=784, out_features=10, bias=True)

In [ ]:

model(x).shape

Out[ ]:

torch.Size([64, 10])

In [ ]:

[p.shape for p in model.parameters()]

Out[ ]:

[torch.Size([10, 784]), torch.Size([10])]

In [ ]:

lr=2e-2

In [ ]:

loss_func = nn.CrossEntropyLoss()

In [ ]:

def update(x,y,lr):
    wd = 1e-5
    y_hat = model(x)
    # weight decay
    w2 = 0.
    for p in model.parameters(): w2 += (p**2).sum()
    # add to regular loss
    loss = loss_func(y_hat, y) + w2*wd
    loss.backward()
    with torch.no_grad():
        for p in model.parameters():
            p.sub_(lr * p.grad)
            p.grad.zero_()
    return loss.item()

In [ ]:

losses = [update(x,y,lr) for x,y in dls.train]

In [ ]:

plt.plot(losses);

In [ ]:

class Mnist_NN(Module):
    def __init__(self):
        self.lin1 = nn.Linear(784, 50, bias=True)
        self.lin2 = nn.Linear(50, 10, bias=True)

    def forward(self, xb):
        x = self.lin1(xb)
        x = F.relu(x)
        return self.lin2(x)

In [ ]:

model = Mnist_NN().cuda()

In [ ]:

losses = [update(x,y,lr) for x,y in dls.train]

In [ ]:

plt.plot(losses);

In [ ]:

model = Mnist_NN().cuda()

In [ ]:

def update(x,y,lr):
    opt = torch.optim.Adam(model.parameters(), lr)
    y_hat = model(x)
    loss = loss_func(y_hat, y)
    loss.backward()
    opt.step()
    opt.zero_grad()
    return loss.item()

In [ ]:

losses = [update(x,y,1e-3) for x,y in dls.train]

In [ ]:

plt.plot(losses);

In [ ]:

learn = Learner(dls, Mnist_NN(), loss_func=loss_func, metrics=accuracy)

In [ ]:

from fastai2.callback.all import *

In [ ]:

learn.lr_find()

In [ ]:

learn.fit_one_cycle(1, 1e-2)

(#5) [0,0.16419988870620728,0.12883101403713226,0.9624000191688538,00:09]

In [ ]:

learn.recorder.plot_sched()

In [ ]:

learn.recorder.plot_loss()

fin¶

In [ ]: