#!/usr/bin/env python # coding: utf-8 # ## CIFAR 10 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: from fastai.conv_learner import * PATH = Path("/home/ubuntu/data/cifar10/") os.makedirs(PATH,exist_ok=True) # In[3]: from torchvision import transforms, datasets # In[4]: torch.cuda.set_device(0) # In[5]: classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159])) # In[6]: bs=256 # In[7]: num_workers = num_cpus() # In[7]: num_workers = 16 # In[32]: traindir = str(PATH/'train') valdir = str(PATH/'test') tfms = [transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] + tfms)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=bs, shuffle=True, num_workers=num_workers, pin_memory=True) val_dataset = datasets.ImageFolder(valdir, transforms.Compose(tfms)) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=bs*2, shuffle=False, num_workers=num_workers, pin_memory=True) # In[33]: data = ModelData(PATH, train_loader, val_loader) data.sz=32 # In[26]: class ConvLayer(nn.Module): def __init__(self, ni, nf, ks=3, stride=1): super().__init__() self.conv = nn.Conv2d(ni, nf, kernel_size=ks, bias=False, stride=stride, padding=ks//2) self.bn = nn.BatchNorm2d(nf, momentum=0.01) self.relu = nn.LeakyReLU(negative_slope=0.1, inplace=True) def forward(self, x): return self.relu(self.bn(self.conv(x))) class ResLayer(nn.Module): def __init__(self, ni, pr_drop=0): super().__init__() self.pr_drop = pr_drop self.conv1=ConvLayer(ni, ni//2, ks=1) self.conv2=ConvLayer(ni//2, ni, ks=3) def forward(self, x): drop = self.training and (random.random() < self.pr_drop) return (0 if drop else self.conv2(self.conv1(x))) + x class Darknet(nn.Module): def make_group_layer(self, ch_in, num_blocks, widen, stride=1, pr_drop=0): return [ConvLayer(ch_in,ch_in*widen,stride=stride) ] + [(ResLayer(ch_in*widen, pr_drop=pr_drop)) for i in range(num_blocks)] def __init__(self, num_blocks, num_classes, start_nf=32, widen=2, pr_drop=0): super().__init__() nf = start_nf layers = [ConvLayer(3, nf, ks=3, stride=1)] for i,nb in enumerate(num_blocks): layers += self.make_group_layer(nf, nb, widen, stride=2-(i==1), pr_drop=pr_drop ) nf *= widen; widen=2 layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] self.layers = nn.Sequential(*layers) def do_pr(self, m, pr): if hasattr(m,'pr_drop'): m.pr_drop=pr def set_pr_drop(self, pr): self.apply(lambda m: self.do_pr(m, pr)) def forward(self, x): return self.layers(x) # In[34]: from models.wideresnet import WideResNet # In[35]: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.) # In[36]: m = nn.DataParallel(m, [0,1,2,3]) # In[37]: lr = 1.3 # In[21]: # m = Darknet([1, 2, 4, 6, 3], num_classes=10, start_nf=32)#, pr_drop=0.5) # m = Darknet([1, 2, 2, 2, 2], num_classes=10, start_nf=32)#, pr_drop=0.5) # m = Darknet([2, 2, 2, 2], num_classes=10, start_nf=64) # In[82]: # from torch.nn.init import kaiming_normal # def init_bn_bias(m): # if isinstance(m, nn.Conv2d): kaiming_normal(m.weight.data, 0.1) # elif isinstance(m, nn.BatchNorm2d): m.bias.data.zero_() # elif isinstance(m, nn.Linear): m.bias.data.zero_() # m.apply(init_bn_bias); # In[38]: learn = ConvLearner.from_model_data(m, data) learn.crit = nn.CrossEntropyLoss() learn.metrics = [accuracy] wd=1e-4 # In[39]: learn.half() # In[157]: learn.model.set_pr_drop(0) # In[40]: # DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.) learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8)) # In[16]: get_ipython().run_line_magic('time', 'learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))') # In[15]: # DP: m = WideResNet(depth=22, num_classes=10, widen_factor=6, dropRate=0.) learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8)) # In[16]: get_ipython().run_line_magic('time', 'learn.fit(lr, 1, wds=wd, cycle_len=30, use_clr_beta=(20, 20, 0.95, 0.85))') # In[ ]: # In[18]: learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8)) # In[19]: get_ipython().run_line_magic('time', 'learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(10, 15, 0.95, 0.85))') # In[ ]: # In[22]: learn.fit(lr/10, 1, wds=wd, cycle_len=1, use_clr_beta=(100, 1, 0.9, 0.8)) # In[23]: get_ipython().run_line_magic('time', 'learn.fit(1., 1, wds=wd, cycle_len=30, use_clr_beta=(10, 25, 0.95, 0.85))') # In[ ]: # In[15]: get_ipython().run_line_magic('time', 'learn.fit(lr, 1, wds=wd, cycle_len=40, use_clr_beta=(100, 15, 0.95, 0.85))') # In[ ]: # In[51]: # darknet 2222 lr 1.3 65 cl get_ipython().run_line_magic('time', 'learn.fit(lr, 1, wds=wd, cycle_len=65, use_clr_beta=(30, 20, 0.95, 0.85))') # In[ ]: