Notebook

Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x

CIFAR 10¶

In [ ]:

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [ ]:

from fastai.conv_learner import *
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)
torch.cuda.set_device(1)

In [ ]:

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

num_workers = num_cpus()//2
bs=256
sz=32

In [ ]:

tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz//8)
data = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)

In [ ]:

def conv_layer(ni, nf, ks=3, stride=1):
    return nn.Sequential(
        nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2),
        nn.BatchNorm2d(nf, momentum=0.01),
        nn.LeakyReLU(negative_slope=0.1, inplace=True))

In [ ]:

class ResLayer(nn.Module):
    def __init__(self, ni):
        super().__init__()
        self.conv1=conv_layer(ni, ni//2, ks=1)
        self.conv2=conv_layer(ni//2, ni, ks=3)
        
    def forward(self, x): 
        # changed to x.add, as x.add_ leads to error (happened on single GPU):
        # one of the variables needed for gradient computation has been modified by an inplace operation
        return x.add(self.conv2(self.conv1(x)))

In [ ]:

class Darknet(nn.Module):
    def make_group_layer(self, ch_in, num_blocks, stride=1):
        return [conv_layer(ch_in, ch_in*2,stride=stride)
               ] + [(ResLayer(ch_in*2)) for i in range(num_blocks)]

    def __init__(self, num_blocks, num_classes, nf=32):
        super().__init__()
        layers = [conv_layer(3, nf, ks=3, stride=1)]
        for i,nb in enumerate(num_blocks):
            layers += self.make_group_layer(nf, nb, stride=2-(i==1))
            nf *= 2
        layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x): return self.layers(x)

In [ ]:

m = Darknet([1, 2, 4, 6, 3], num_classes=10, nf=32)
m = nn.DataParallel(m, device_ids=None)
# if you have several GPUs for true parallel processing enable
# m = nn.DataParallel(m, device_ids=[1, 2, 3])

In [ ]:

lr = 1.3

In [ ]:

learn = ConvLearner.from_model_data(m, data)
learn.crit = nn.CrossEntropyLoss()
learn.metrics = [accuracy]
wd=1e-4

In [ ]:

%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.057594   1.163891   0.6072    
    1      0.791428   0.859953   0.7078                      
    2      0.642778   0.884299   0.7168                      
    3      0.586491   0.85431    0.7149                      
    4      0.530411   0.724244   0.7607                      
    5      0.492245   0.718871   0.764                       
    6      0.465657   0.546274   0.8112                      
    7      0.437823   0.536493   0.8182                      
    8      0.440909   0.69369    0.7729                      
    9      0.408925   1.115436   0.7126                      
    10     0.401172   0.902935   0.733                       
    11     0.397317   0.690258   0.7921                      
    12     0.376588   0.514558   0.8287                      
    13     0.366199   0.442919   0.8527                      
    14     0.345316   0.796473   0.7753                      
    15     0.333985   0.405802   0.8646                      
    16     0.306255   0.492593   0.8455                      
    17     0.307262   0.405131   0.8656                      
    18     0.285951   0.504936   0.8388                      
    19     0.262031   0.564698   0.8289                      
    20     0.255914   0.602085   0.8234                      
    21     0.20444    0.316775   0.9016                      
    22     0.161276   0.281515   0.9096                      
    23     0.110206   0.221838   0.929                       
    24     0.071019   0.203805   0.9368                       
    25     0.056447   0.220052   0.9346                       
    26     0.047433   0.206167   0.9388                       
    27     0.0333     0.212472   0.94                         
    28     0.025766   0.209915   0.9423                       
    29     0.021697   0.207422   0.9427                       

CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s
Wall time: 15min 12s

Out[ ]:

[array([0.20742]), 0.9427]

In [ ]:

# DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.)
learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.294204   1.058497   0.6254

Out[ ]:

[array([1.0585]), 0.6254]

In [ ]:

%time learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.057594   1.163891   0.6072    
    1      0.791428   0.859953   0.7078                      
    2      0.642778   0.884299   0.7168                      
    3      0.586491   0.85431    0.7149                      
    4      0.530411   0.724244   0.7607                      
    5      0.492245   0.718871   0.764                       
    6      0.465657   0.546274   0.8112                      
    7      0.437823   0.536493   0.8182                      
    8      0.440909   0.69369    0.7729                      
    9      0.408925   1.115436   0.7126                      
    10     0.401172   0.902935   0.733                       
    11     0.397317   0.690258   0.7921                      
    12     0.376588   0.514558   0.8287                      
    13     0.366199   0.442919   0.8527                      
    14     0.345316   0.796473   0.7753                      
    15     0.333985   0.405802   0.8646                      
    16     0.306255   0.492593   0.8455                      
    17     0.307262   0.405131   0.8656                      
    18     0.285951   0.504936   0.8388                      
    19     0.262031   0.564698   0.8289                      
    20     0.255914   0.602085   0.8234                      
    21     0.20444    0.316775   0.9016                      
    22     0.161276   0.281515   0.9096                      
    23     0.110206   0.221838   0.929                       
    24     0.071019   0.203805   0.9368                       
    25     0.056447   0.220052   0.9346                       
    26     0.047433   0.206167   0.9388                       
    27     0.0333     0.212472   0.94                         
    28     0.025766   0.209915   0.9423                       
    29     0.021697   0.207422   0.9427                       

CPU times: user 12min 33s, sys: 6min 30s, total: 19min 4s
Wall time: 15min 12s

Out[ ]:

[array([0.20742]), 0.9427]

In [ ]:

learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.290646   1.08506    0.6043

Out[ ]:

[array([1.08506]), 0.6043]

In [ ]:

%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(10, 15, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.084963   1.085174   0.6082    
    1      0.825962   1.001847   0.6643                      
    2      0.702493   0.930496   0.6788                      
    3      0.593327   0.76002    0.7503                      
    4      0.543732   0.654882   0.7788                      
    5      0.503807   0.954524   0.7058                      
    6      0.460451   0.520503   0.8216                      
    7      0.429511   0.73571    0.7748                      
    8      0.419969   0.555502   0.8179                      
    9      0.415771   0.593286   0.8055                      
    10     0.40077    0.642513   0.8029                      
    11     0.385437   0.965159   0.7204                      
    12     0.383747   0.691304   0.783                       
    13     0.35558    0.705928   0.774                       
    14     0.355323   0.631229   0.8075                      
    15     0.354718   0.796365   0.7543                      
    16     0.348135   0.771605   0.7833                      
    17     0.330313   0.57747    0.819                       
    18     0.319511   0.58501    0.8249                      
    19     0.305789   0.553965   0.8239                      
    20     0.309558   0.632576   0.794                       
    21     0.312331   0.490521   0.8399                      
    22     0.292812   0.740069   0.7778                      
    23     0.277894   0.671362   0.8029                      
    24     0.273641   0.489029   0.8439                      
    25     0.262769   0.492826   0.8458                      
    26     0.262545   0.417226   0.8648                      
    27     0.24185    0.522333   0.8441                      
    28     0.235303   0.573098   0.8327                      
    29     0.223754   0.468723   0.8581                      
    30     0.212939   0.421378   0.8665                      
    31     0.168992   0.31097    0.9021                      
    32     0.162031   0.339769   0.8921                      
    33     0.11894    0.280535   0.9163                      
    34     0.091013   0.250175   0.9251                       
    35     0.067888   0.2331     0.9318                       
    36     0.047799   0.226535   0.9368                       
    37     0.030331   0.216385   0.9405                       
    38     0.020049   0.204373   0.9429                       
    39     0.012407   0.20191    0.9448                       

CPU times: user 31min 18s, sys: 8min 10s, total: 39min 28s
Wall time: 39min 28s

Out[ ]:

[array([0.20191]), 0.9448]

In [ ]:

learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.272734   1.096014   0.6024

Out[ ]:

[array([1.09601]), 0.6024]

In [ ]:

%time learn.fit(1., 1, wds=wd, cycle_len=30, use_clr_beta=(10, 25, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.05031    1.058887   0.6364    
    1      0.784812   1.010626   0.6695                      
    2      0.662347   0.962157   0.6972                      
    3      0.575188   0.63956    0.7755                      
    4      0.516394   0.830657   0.7348                      
    5      0.465992   0.802316   0.7552                      
    6      0.437692   0.666769   0.7819                      
    7      0.421664   0.699891   0.7839                      
    8      0.409448   0.716692   0.7687                      
    9      0.378664   0.57033    0.8045                      
    10     0.378573   0.713038   0.7787                      
    11     0.358377   0.843966   0.7419                      
    12     0.328598   0.72601    0.7865                      
    13     0.32008    0.702174   0.7723                      
    14     0.299527   0.478337   0.8429                      
    15     0.292695   0.692018   0.7925                      
    16     0.279109   0.502219   0.8366                      
    17     0.252726   0.3686     0.8795                      
    18     0.243707   0.375788   0.8743                      
    19     0.220037   0.447402   0.8516                      
    20     0.200196   0.408494   0.8703                      
    21     0.145898   0.295205   0.9067                      
    22     0.10881    0.255926   0.9216                      
    23     0.086912   0.239246   0.9252                       
    24     0.065587   0.299195   0.9173                       
    25     0.058427   0.239054   0.9322                       
    26     0.035484   0.233135   0.9357                       
    27     0.026134   0.229944   0.9382                       
    28     0.018713   0.225882   0.9424                       
    29     0.012363   0.228542   0.9416                       

CPU times: user 23min 22s, sys: 6min 6s, total: 29min 28s
Wall time: 29min 27s

Out[ ]:

[array([0.22854]), 0.9416]

In [ ]:

%time learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(100, 15, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.428835   1.412945   0.5194    
    1      0.995178   2.106482   0.4173                      
    2      0.783325   1.311001   0.6189                      
    3      0.650752   0.948611   0.6966                      
    4      0.572025   0.611907   0.7906                      
    5      0.540067   0.853141   0.7339                      
    6      0.479793   0.552199   0.8167                      
    7      0.457727   0.951997   0.7204                      
    8      0.429621   0.825224   0.7421                      
    9      0.412101   1.17335    0.6826                      
    10     0.4009     0.808628   0.7315                      
    11     0.383221   0.60638    0.804                       
    12     0.36901    0.647075   0.7874                      
    13     0.368189   0.741168   0.7825                      
    14     0.366653   0.653589   0.7973                      
    15     0.355023   0.71174    0.7841                      
    16     0.354656   0.581478   0.8107                      
    17     0.335075   0.602485   0.8103                      
    18     0.336688   0.677685   0.7901                      
    19     0.318256   0.670932   0.7865                      
    20     0.30799    0.627659   0.8039                      
    21     0.298475   0.429212   0.8597                      
    22     0.297467   0.565836   0.8317                      
    23     0.273995   0.513813   0.8408                      
    24     0.270703   0.557105   0.822                       
    25     0.259869   0.370845   0.8781                      
    26     0.242922   0.517631   0.8333                      
    27     0.22433    0.424905   0.8628                      
    28     0.217046   0.460085   0.8528                      
    29     0.179096   0.329448   0.8916                      
    30     0.15454    0.34805    0.892                       
    31     0.125648   0.262553   0.919                       
    32     0.0801     0.244773   0.9244                       
    33     0.047182   0.206418   0.9396                       
    34     0.082104   0.277417   0.9182                       
    35     0.070456   0.292839   0.9197                       
    36     0.049607   0.245121   0.9325                       
    37     0.03186    0.245641   0.9357                       
    38     0.017482   0.21458    0.9405                       
    39     0.012877   0.207285   0.9427                       

CPU times: user 31min 8s, sys: 8min 12s, total: 39min 20s
Wall time: 39min 22s

Out[ ]:

[array([0.20728]), 0.9427]

In [ ]:

# darknet 2222 lr 1.3 65 cl
%time learn.fit(lr, 1, wds=wd, cycle_len=65, use_clr_beta=(30, 20, 0.95, 0.85))

A Jupyter Widget

epoch      trn_loss   val_loss   accuracy                   
    0      1.533084   1.725181   0.4627    
    1      1.224625   1.241789   0.5727                     
    2      0.995259   1.005162   0.6476                      
    3      0.865179   0.949657   0.6642                      
    4      0.756122   0.854677   0.6968                      
    5      0.692097   1.110497   0.6578                      
    6      0.635014   0.805524   0.7227                      
    7      0.588518   0.759759   0.7334                      
    8      0.567764   0.868924   0.7131                      
    9      0.547826   0.700656   0.7645                      
    10     0.524676   1.005113   0.6889                      
    11     0.50653    0.721323   0.7645                      
    12     0.493718   1.125408   0.6608                      
    13     0.479434   0.756994   0.7639                      
    14     0.475674   0.73913    0.7589                      
    15     0.464452   0.612312   0.7955                      
    16     0.453685   0.772014   0.757                       
    17     0.436029   0.60522    0.7943                      
    18     0.437321   0.555058   0.8158                      
    19     0.439846   0.819791   0.7449                      
    20     0.420495   0.994983   0.719                       
    21     0.416594   0.687188   0.7813                      
    22     0.413399   0.714974   0.7787                      
    23     0.421343   0.696471   0.7761                      
    24     0.41174    0.853185   0.7445                      
    25     0.411808   0.693145   0.7781                      
    26     0.412166   0.847656   0.7456                      
    27     0.402742   0.73174    0.772                       
    28     0.391636   0.685092   0.7868                      
    29     0.384671   0.635394   0.7931                      
    30     0.364357   0.856764   0.7271                      
    31     0.374435   0.490243   0.8325                      
    32     0.364152   0.685217   0.7872                      
    33     0.361441   0.724616   0.7843                      
    34     0.344948   0.541638   0.8189                      
    35     0.341661   0.604952   0.8152                      
    36     0.337969   0.571531   0.8172                      
    37     0.328699   0.55272    0.8177                      
    38     0.32664    0.429266   0.8554                      
    39     0.316233   0.424243   0.8555                      
    40     0.302454   0.455984   0.8502                      
    41     0.296169   0.61181    0.8123                      
    42     0.283048   0.572225   0.8267                      
    43     0.275228   0.453885   0.853                       
    44     0.273048   0.408815   0.863                       
    45     0.254404   0.397202   0.8715                      
    46     0.219166   0.403471   0.868                       
    47     0.215263   0.323341   0.8928                      
    48     0.192285   0.37336    0.8824                      
    49     0.163661   0.270863   0.9095                      
    50     0.118515   0.269602   0.9151                      
    51     0.089315   0.209591   0.9317                       
    52     0.058886   0.212586   0.9339                       
    53     0.05148    0.212392   0.9345                       
    54     0.046729   0.232031   0.9343                       
    55     0.038997   0.231949   0.9349                       
    56     0.035254   0.233632   0.9349                       
    57     0.03046    0.232361   0.937                        
    58     0.027203   0.22916    0.94                         
    59     0.020285   0.231641   0.9401                       
    60     0.017448   0.23432    0.9405                       
    61     0.016971   0.232452   0.9415                       
    62     0.011784   0.23313    0.9416                       
    63     0.011399   0.233199   0.9432                       
    64     0.009589   0.233732   0.9422                        

CPU times: user 1h 5min 54s, sys: 16min 4s, total: 1h 21min 59s
Wall time: 57min 23s

Out[ ]:

[array([0.23373]), 0.9422]

In [ ]: