Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x
%matplotlib inline
%reload_ext autoreload
%autoreload 2
from fastai.conv_learner import *
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)
torch.cuda.set_device(1)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159]))
num_workers = num_cpus()//2
bs=256
sz=32
tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)
def conv_layer(ni, nf, ks=3, stride=1):
return nn.Sequential(
nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),
nn.BatchNorm2d(nf, momentum=0.01),
nn.LeakyReLU(negative_slope=0.1, inplace=True))
class ResLayer(nn.Module):
def __init__(self, ni):
super().__init__()
self.conv1=conv_layer(ni, ni//2, ks=1)
self.conv2=conv_layer(ni//2, ni, ks=3)
def forward(self, x):
# changed to x.add, as x.add_ leads to error (happened on single GPU):
# one of the variables needed for gradient computation has been modified by an inplace operation
return x.add(self.conv2(self.conv1(x)))
class Darknet(nn.Module):
def make_group_layer(self, ch_in, num_blocks, stride=1):
return [conv_layer(ch_in, ch_in*2,stride=stride)
] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]
def __init__(self, num_blocks, num_classes, nf=32):
super().__init__()
layers = [conv_layer(3, nf, ks=3, stride=1)]
for i,nb in enumerate(num_blocks):
layers += self.make_group_layer(nf, nb, stride=2-(i==1))
nf *= 2
layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
self.layers = nn.Sequential(*layers)
def forward(self, x): return self.layers(x)
m = Darknet([1, 2, 4, 6, 3], num_classes=10, nf=32)
m = nn.DataParallel(m, device_ids=None)
# if you have several GPUs for true parallel processing enable
# m = nn.DataParallel(m, device_ids=[1, 2, 3])
lr = 1.3
learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.057594 1.163891 0.6072
1 0.791428 0.859953 0.7078
2 0.642778 0.884299 0.7168
3 0.586491 0.85431 0.7149
4 0.530411 0.724244 0.7607
5 0.492245 0.718871 0.764
6 0.465657 0.546274 0.8112
7 0.437823 0.536493 0.8182
8 0.440909 0.69369 0.7729
9 0.408925 1.115436 0.7126
10 0.401172 0.902935 0.733
11 0.397317 0.690258 0.7921
12 0.376588 0.514558 0.8287
13 0.366199 0.442919 0.8527
14 0.345316 0.796473 0.7753
15 0.333985 0.405802 0.8646
16 0.306255 0.492593 0.8455
17 0.307262 0.405131 0.8656
18 0.285951 0.504936 0.8388
19 0.262031 0.564698 0.8289
20 0.255914 0.602085 0.8234
21 0.20444 0.316775 0.9016
22 0.161276 0.281515 0.9096
23 0.110206 0.221838 0.929
24 0.071019 0.203805 0.9368
25 0.056447 0.220052 0.9346
26 0.047433 0.206167 0.9388
27 0.0333 0.212472 0.94
28 0.025766 0.209915 0.9423
29 0.021697 0.207422 0.9427
CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s
Wall time: 15min 12s
[array([0.20742]), 0.9427]
# DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.294204 1.058497 0.6254
[array([1.0585]), 0.6254]
%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.057594 1.163891 0.6072
1 0.791428 0.859953 0.7078
2 0.642778 0.884299 0.7168
3 0.586491 0.85431 0.7149
4 0.530411 0.724244 0.7607
5 0.492245 0.718871 0.764
6 0.465657 0.546274 0.8112
7 0.437823 0.536493 0.8182
8 0.440909 0.69369 0.7729
9 0.408925 1.115436 0.7126
10 0.401172 0.902935 0.733
11 0.397317 0.690258 0.7921
12 0.376588 0.514558 0.8287
13 0.366199 0.442919 0.8527
14 0.345316 0.796473 0.7753
15 0.333985 0.405802 0.8646
16 0.306255 0.492593 0.8455
17 0.307262 0.405131 0.8656
18 0.285951 0.504936 0.8388
19 0.262031 0.564698 0.8289
20 0.255914 0.602085 0.8234
21 0.20444 0.316775 0.9016
22 0.161276 0.281515 0.9096
23 0.110206 0.221838 0.929
24 0.071019 0.203805 0.9368
25 0.056447 0.220052 0.9346
26 0.047433 0.206167 0.9388
27 0.0333 0.212472 0.94
28 0.025766 0.209915 0.9423
29 0.021697 0.207422 0.9427
CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s
Wall time: 15min 12s
[array([0.20742]), 0.9427]
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.290646 1.08506 0.6043
[array([1.08506]), 0.6043]
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(10, 15, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.084963 1.085174 0.6082
1 0.825962 1.001847 0.6643
2 0.702493 0.930496 0.6788
3 0.593327 0.76002 0.7503
4 0.543732 0.654882 0.7788
5 0.503807 0.954524 0.7058
6 0.460451 0.520503 0.8216
7 0.429511 0.73571 0.7748
8 0.419969 0.555502 0.8179
9 0.415771 0.593286 0.8055
10 0.40077 0.642513 0.8029
11 0.385437 0.965159 0.7204
12 0.383747 0.691304 0.783
13 0.35558 0.705928 0.774
14 0.355323 0.631229 0.8075
15 0.354718 0.796365 0.7543
16 0.348135 0.771605 0.7833
17 0.330313 0.57747 0.819
18 0.319511 0.58501 0.8249
19 0.305789 0.553965 0.8239
20 0.309558 0.632576 0.794
21 0.312331 0.490521 0.8399
22 0.292812 0.740069 0.7778
23 0.277894 0.671362 0.8029
24 0.273641 0.489029 0.8439
25 0.262769 0.492826 0.8458
26 0.262545 0.417226 0.8648
27 0.24185 0.522333 0.8441
28 0.235303 0.573098 0.8327
29 0.223754 0.468723 0.8581
30 0.212939 0.421378 0.8665
31 0.168992 0.31097 0.9021
32 0.162031 0.339769 0.8921
33 0.11894 0.280535 0.9163
34 0.091013 0.250175 0.9251
35 0.067888 0.2331 0.9318
36 0.047799 0.226535 0.9368
37 0.030331 0.216385 0.9405
38 0.020049 0.204373 0.9429
39 0.012407 0.20191 0.9448
CPU times: user 31min 18s, sys: 8min 10s, total: 39min 28s
Wall time: 39min 28s
[array([0.20191]), 0.9448]
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.272734 1.096014 0.6024
[array([1.09601]), 0.6024]
%time learn.fit(1., 1, wds=wd, cycle_len=30, use_clr_beta=(10, 25, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.05031 1.058887 0.6364
1 0.784812 1.010626 0.6695
2 0.662347 0.962157 0.6972
3 0.575188 0.63956 0.7755
4 0.516394 0.830657 0.7348
5 0.465992 0.802316 0.7552
6 0.437692 0.666769 0.7819
7 0.421664 0.699891 0.7839
8 0.409448 0.716692 0.7687
9 0.378664 0.57033 0.8045
10 0.378573 0.713038 0.7787
11 0.358377 0.843966 0.7419
12 0.328598 0.72601 0.7865
13 0.32008 0.702174 0.7723
14 0.299527 0.478337 0.8429
15 0.292695 0.692018 0.7925
16 0.279109 0.502219 0.8366
17 0.252726 0.3686 0.8795
18 0.243707 0.375788 0.8743
19 0.220037 0.447402 0.8516
20 0.200196 0.408494 0.8703
21 0.145898 0.295205 0.9067
22 0.10881 0.255926 0.9216
23 0.086912 0.239246 0.9252
24 0.065587 0.299195 0.9173
25 0.058427 0.239054 0.9322
26 0.035484 0.233135 0.9357
27 0.026134 0.229944 0.9382
28 0.018713 0.225882 0.9424
29 0.012363 0.228542 0.9416
CPU times: user 23min 22s, sys: 6min 6s, total: 29min 28s
Wall time: 29min 27s
[array([0.22854]), 0.9416]
%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(100, 15, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.428835 1.412945 0.5194
1 0.995178 2.106482 0.4173
2 0.783325 1.311001 0.6189
3 0.650752 0.948611 0.6966
4 0.572025 0.611907 0.7906
5 0.540067 0.853141 0.7339
6 0.479793 0.552199 0.8167
7 0.457727 0.951997 0.7204
8 0.429621 0.825224 0.7421
9 0.412101 1.17335 0.6826
10 0.4009 0.808628 0.7315
11 0.383221 0.60638 0.804
12 0.36901 0.647075 0.7874
13 0.368189 0.741168 0.7825
14 0.366653 0.653589 0.7973
15 0.355023 0.71174 0.7841
16 0.354656 0.581478 0.8107
17 0.335075 0.602485 0.8103
18 0.336688 0.677685 0.7901
19 0.318256 0.670932 0.7865
20 0.30799 0.627659 0.8039
21 0.298475 0.429212 0.8597
22 0.297467 0.565836 0.8317
23 0.273995 0.513813 0.8408
24 0.270703 0.557105 0.822
25 0.259869 0.370845 0.8781
26 0.242922 0.517631 0.8333
27 0.22433 0.424905 0.8628
28 0.217046 0.460085 0.8528
29 0.179096 0.329448 0.8916
30 0.15454 0.34805 0.892
31 0.125648 0.262553 0.919
32 0.0801 0.244773 0.9244
33 0.047182 0.206418 0.9396
34 0.082104 0.277417 0.9182
35 0.070456 0.292839 0.9197
36 0.049607 0.245121 0.9325
37 0.03186 0.245641 0.9357
38 0.017482 0.21458 0.9405
39 0.012877 0.207285 0.9427
CPU times: user 31min 8s, sys: 8min 12s, total: 39min 20s
Wall time: 39min 22s
[array([0.20728]), 0.9427]
# darknet 2222 lr 1.3 65 cl
%time learn.fit(lr, 1, wds=wd, cycle_len=65, use_clr_beta=(30, 20, 0.95, 0.85))
A Jupyter Widget
epoch trn_loss val_loss accuracy
0 1.533084 1.725181 0.4627
1 1.224625 1.241789 0.5727
2 0.995259 1.005162 0.6476
3 0.865179 0.949657 0.6642
4 0.756122 0.854677 0.6968
5 0.692097 1.110497 0.6578
6 0.635014 0.805524 0.7227
7 0.588518 0.759759 0.7334
8 0.567764 0.868924 0.7131
9 0.547826 0.700656 0.7645
10 0.524676 1.005113 0.6889
11 0.50653 0.721323 0.7645
12 0.493718 1.125408 0.6608
13 0.479434 0.756994 0.7639
14 0.475674 0.73913 0.7589
15 0.464452 0.612312 0.7955
16 0.453685 0.772014 0.757
17 0.436029 0.60522 0.7943
18 0.437321 0.555058 0.8158
19 0.439846 0.819791 0.7449
20 0.420495 0.994983 0.719
21 0.416594 0.687188 0.7813
22 0.413399 0.714974 0.7787
23 0.421343 0.696471 0.7761
24 0.41174 0.853185 0.7445
25 0.411808 0.693145 0.7781
26 0.412166 0.847656 0.7456
27 0.402742 0.73174 0.772
28 0.391636 0.685092 0.7868
29 0.384671 0.635394 0.7931
30 0.364357 0.856764 0.7271
31 0.374435 0.490243 0.8325
32 0.364152 0.685217 0.7872
33 0.361441 0.724616 0.7843
34 0.344948 0.541638 0.8189
35 0.341661 0.604952 0.8152
36 0.337969 0.571531 0.8172
37 0.328699 0.55272 0.8177
38 0.32664 0.429266 0.8554
39 0.316233 0.424243 0.8555
40 0.302454 0.455984 0.8502
41 0.296169 0.61181 0.8123
42 0.283048 0.572225 0.8267
43 0.275228 0.453885 0.853
44 0.273048 0.408815 0.863
45 0.254404 0.397202 0.8715
46 0.219166 0.403471 0.868
47 0.215263 0.323341 0.8928
48 0.192285 0.37336 0.8824
49 0.163661 0.270863 0.9095
50 0.118515 0.269602 0.9151
51 0.089315 0.209591 0.9317
52 0.058886 0.212586 0.9339
53 0.05148 0.212392 0.9345
54 0.046729 0.232031 0.9343
55 0.038997 0.231949 0.9349
56 0.035254 0.233632 0.9349
57 0.03046 0.232361 0.937
58 0.027203 0.22916 0.94
59 0.020285 0.231641 0.9401
60 0.017448 0.23432 0.9405
61 0.016971 0.232452 0.9415
62 0.011784 0.23313 0.9416
63 0.011399 0.233199 0.9432
64 0.009589 0.233732 0.9422
CPU times: user 1h 5min 54s, sys: 16min 4s, total: 1h 21min 59s
Wall time: 57min 23s
[array([0.23373]), 0.9422]