In [ ]:

#hide
from fastai2.vision.all import *
from utils import *

matplotlib.rc('image', cmap='Greys')

Convolutional neural networks¶

The magic of convolutions¶

In [ ]:

path = untar_data(URLs.MNIST_SAMPLE)

In [ ]:

#hide
Path.BASE_PATH = path

In [ ]:

top_edge = tensor([[-1,-1,-1],
                   [ 0, 0, 0],
                   [ 1, 1, 1]]).float()

In [ ]:

im3 = Image.open(path/'train'/'3'/'12.png')
show_image(im3);

In [ ]:

im3_t = tensor(im3)
im3_t[0:3,0:3] * top_edge

Out[ ]:

tensor([[-0., -0., -0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [ ]:

(im3_t[0:3,0:3] * top_edge).sum()

Out[ ]:

tensor(0.)

In [ ]:

df = pd.DataFrame(im3_t[:10,:20])
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')

Out[ ]:

	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
5	12	99	91	142	155	246	182	155	155	155	155	131	52	0	0	0
6	138	254	254	254	254	254	254	254	254	254	254	254	252	210	122	33
7	220	254	254	254	235	189	189	189	189	150	189	205	254	254	254	75
8	35	74	35	35	25	0	0	0	0	0	0	13	224	254	254	153
9	0	0	0	0	0	0	0	0	0	0	0	90	254	254	247	53

In [ ]:

(im3_t[4:7,6:9] * top_edge).sum()

Out[ ]:

tensor(762.)

In [ ]:

(im3_t[7:10,17:20] * top_edge).sum()

Out[ ]:

tensor(-29.)

In [ ]:

def apply_kernel(row, col, kernel):
    return (im3_t[row-1:row+2,col-1:col+2] * kernel).sum()

In [ ]:

apply_kernel(5,7,top_edge)

Out[ ]:

tensor(762.)

In [ ]:

[[(i,j) for j in range(1,5)] for i in range(1,5)]

Out[ ]:

[[(1, 1), (1, 2), (1, 3), (1, 4)],
 [(2, 1), (2, 2), (2, 3), (2, 4)],
 [(3, 1), (3, 2), (3, 3), (3, 4)],
 [(4, 1), (4, 2), (4, 3), (4, 4)]]

In [ ]:

rng = range(1,27)
top_edge3 = tensor([[apply_kernel(i,j,top_edge) for j in rng] for i in rng])

show_image(top_edge3);

In [ ]:

left_edge = tensor([[-1,1,0],
                    [-1,1,0],
                    [-1,1,0]]).float()

left_edge3 = tensor([[apply_kernel(i,j,left_edge) for j in rng] for i in rng])

show_image(left_edge3);

Convolutions in PyTorch¶

In [ ]:

diag1_edge = tensor([[ 0,-1, 1],
                     [-1, 1, 0],
                     [ 1, 0, 0]]).float()
diag2_edge = tensor([[ 1,-1, 0],
                     [ 0, 1,-1],
                     [ 0, 0, 1]]).float()

edge_kernels = torch.stack([left_edge, top_edge, diag1_edge, diag2_edge])
edge_kernels.shape

Out[ ]:

torch.Size([4, 3, 3])

In [ ]:

mnist = DataBlock(blockernel_size=(ImageBlock(cls=PILImageBW), CategoryBlock), 
                  get_items=get_image_files, 
                  splitter=GrandparentSplitter(),
                  get_y=parent_label)

dls = mnist.dataloaders(path)
xb,yb = first(dls.valid)
xb.shape

Out[ ]:

torch.Size([64, 1, 28, 28])

In [ ]:

xb,yb = to_cpu(xb),to_cpu(yb)

In [ ]:

edge_kernels.shape,edge_kernels.unsqueeze(1).shape

Out[ ]:

(torch.Size([4, 3, 3]), torch.Size([4, 1, 3, 3]))

In [ ]:

edge_kernels = edge_kernels.unsqueeze(1)

In [ ]:

batch_features = F.conv2d(xb, edge_kernels)
batch_features.shape

Out[ ]:

torch.Size([64, 4, 26, 26])

In [ ]:

show_image(batch_features[0,0]);

Strides and padding¶

CNNs from different viewpoints¶

Our first convolutional neural network¶

Learning kernels¶

Creating the CNN¶

In [ ]:

simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1)
)

In [ ]:

simple_net

Out[ ]:

Sequential(
  (0): Linear(in_features=784, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=1, bias=True)
)

In [ ]:

broken_cnn = sequential(
    nn.Conv2d(1,30, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(30,1, kernel_size=3, padding=1)
)

In [ ]:

broken_cnn(xb).shape

Out[ ]:

torch.Size([64, 1, 28, 28])

In [ ]:

def conv(ni, nf, ks=3, act=True):
    res = nn.Conv2d(ni, nf, stride=2, kernel_size=ks, padding=ks//2)
    if act: res = nn.Sequential(res, nn.ReLU())
    return res

In [ ]:

simple_cnn = sequential(
    conv(1 ,4),            #14x14
    conv(4 ,8),            #7x7
    conv(8 ,16),           #4x4
    conv(16,32),           #2x2
    conv(32,2, act=False), #1x1
    Flatten(),
)

In [ ]:

simple_cnn(xb).shape

Out[ ]:

torch.Size([64, 2])

In [ ]:

learn = Learner(dls, simple_cnn, loss_func=F.cross_entropy, metrics=accuracy)

In [ ]:

learn.summary()

Out[ ]:

Sequential (Input shape: ['64 x 1 x 28 x 28'])
================================================================
Layer (type)         Output Shape         Param #    Trainable 
================================================================
Conv2d               64 x 4 x 14 x 14     40         True      
________________________________________________________________
ReLU                 64 x 4 x 14 x 14     0          False     
________________________________________________________________
Conv2d               64 x 8 x 7 x 7       296        True      
________________________________________________________________
ReLU                 64 x 8 x 7 x 7       0          False     
________________________________________________________________
Conv2d               64 x 16 x 4 x 4      1,168      True      
________________________________________________________________
ReLU                 64 x 16 x 4 x 4      0          False     
________________________________________________________________
Conv2d               64 x 32 x 2 x 2      4,640      True      
________________________________________________________________
ReLU                 64 x 32 x 2 x 2      0          False     
________________________________________________________________
Conv2d               64 x 2 x 1 x 1       578        True      
________________________________________________________________
Flatten              64 x 2               0          False     
________________________________________________________________

Total params: 6,722
Total trainable params: 6,722
Total non-trainable params: 0

Optimizer used: <function Adam at 0x7fbc9c258cb0>
Loss function: <function cross_entropy at 0x7fbca9ba0170>

Callbacks:
  - TrainEvalCallback
  - Recorder
  - ProgressCallback

In [ ]:

learn.fit_one_cycle(2, 0.01)

epoch	train_loss	valid_loss	accuracy	time
0	0.072684	0.045110	0.990186	00:05
1	0.022580	0.030775	0.990186	00:05

Colour images¶

In [ ]:

im = image2tensor(Image.open('images/grizzly.jpg'))
im.shape

Out[ ]:

torch.Size([3, 1000, 846])

In [ ]:

show_image(im);

In [ ]:

_,axs = subplots(1,3)
for bear,ax,color in zip(im,axs,('Reds','Greens','Blues')):
    show_image(255-bear, ax=ax, cmap=color)

Conclusions¶

In [ ]:

	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
5	12	99	91	142	155	246	182	155	155	155	155	131	52	0	0	0
6	138	254	254	254	254	254	254	254	254	254	254	254	252	210	122	33
7	220	254	254	254	235	189	189	189	189	150	189	205	254	254	254	75
8	35	74	35	35	25	0	0	0	0	0	0	13	224	254	254	153
9	0	0	0	0	0	0	0	0	0	0	0	90	254	254	247	53

	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
5	12	99	91	142	155	246	182	155	155	155	155	131	52	0	0	0
6	138	254	254	254	254	254	254	254	254	254	254	254	252	210	122	33
7	220	254	254	254	235	189	189	189	189	150	189	205	254	254	254	75
8	35	74	35	35	25	0	0	0	0	0	0	13	224	254	254	153
9	0	0	0	0	0	0	0	0	0	0	0	90	254	254	247	53

	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
3	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
4	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
5	12	99	91	142	155	246	182	155	155	155	155	131	52	0	0	0
6	138	254	254	254	254	254	254	254	254	254	254	254	252	210	122	33
7	220	254	254	254	235	189	189	189	189	150	189	205	254	254	254	75
8	35	74	35	35	25	0	0	0	0	0	0	13	224	254	254	153
9	0	0	0	0	0	0	0	0	0	0	0	90	254	254	247	53