#|default_exp resnet #|export import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl,numpy as np,matplotlib.pyplot as plt import fastcore.all as fc from collections.abc import Mapping from pathlib import Path from operator import attrgetter,itemgetter from functools import partial from copy import copy from contextlib import contextmanager import torchvision.transforms.functional as TF,torch.nn.functional as F from torch import tensor,nn,optim from torch.utils.data import DataLoader,default_collate from torch.nn import init from torch.optim import lr_scheduler from torcheval.metrics import MulticlassAccuracy from datasets import load_dataset,load_dataset_builder from miniai.datasets import * from miniai.conv import * from miniai.learner import * from miniai.activations import * from miniai.init import * from miniai.sgd import * from fastcore.test import test_close torch.set_printoptions(precision=2, linewidth=140, sci_mode=False) torch.manual_seed(1) mpl.rcParams['image.cmap'] = 'gray' import logging logging.disable(logging.WARNING) set_seed(42) xl,yl = 'image','label' name = "fashion_mnist" bs = 1024 xmean,xstd = 0.28, 0.35 @inplace def transformi(b): b[xl] = [(TF.to_tensor(o)-xmean)/xstd for o in b[xl]] dsd = load_dataset(name) tds = dsd.with_transform(transformi) dls = DataLoaders.from_dd(tds, bs, num_workers=4) #|export act_gr = partial(GeneralRelu, leak=0.1, sub=0.4) metrics = MetricsCB(accuracy=MulticlassAccuracy()) astats = ActivationStats(fc.risinstance(GeneralRelu)) cbs = [DeviceCB(), metrics, ProgressCB(plot=True), astats] iw = partial(init_weights, leaky=0.1) def get_model(act=nn.ReLU, nfs=(8,16,32,64,128), norm=nn.BatchNorm2d): layers = [conv(1, 8, stride=1, act=act, norm=norm)] layers += [conv(nfs[i], nfs[i+1], act=act, norm=norm) for i in range(len(nfs)-1)] return nn.Sequential(*layers, conv(nfs[-1], 10, act=None, norm=norm, bias=True), nn.Flatten()).to(def_device) set_seed(42) lr,epochs = 6e-2,5 model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw) tmax = epochs * len(dls.train) sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax) xtra = [BatchSchedCB(sched)] learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW) learn.fit(epochs) #|export def _conv_block(ni, nf, stride, act=act_gr, norm=None, ks=3): return nn.Sequential(conv(ni, nf, stride=1, act=act, norm=norm, ks=ks), conv(nf, nf, stride=stride, act=None, norm=norm, ks=ks)) class ResBlock(nn.Module): def __init__(self, ni, nf, stride=1, ks=3, act=act_gr, norm=None): super().__init__() self.convs = _conv_block(ni, nf, stride, act=act, ks=ks, norm=norm) self.idconv = fc.noop if ni==nf else conv(ni, nf, ks=1, stride=1, act=None) self.pool = fc.noop if stride==1 else nn.AvgPool2d(2, ceil_mode=True) self.act = act() def forward(self, x): return self.act(self.convs(x) + self.idconv(self.pool(x))) def get_model(act=nn.ReLU, nfs=(8,16,32,64,128,256), norm=nn.BatchNorm2d): layers = [ResBlock(1, 8, stride=1, act=act, norm=norm)] layers += [ResBlock(nfs[i], nfs[i+1], act=act, norm=norm, stride=2) for i in range(len(nfs)-1)] layers += [nn.Flatten(), nn.Linear(nfs[-1], 10, bias=False), nn.BatchNorm1d(10)] return nn.Sequential(*layers).to(def_device) def _print_shape(hook, mod, inp, outp): print(type(mod).__name__, inp[0].shape, outp.shape) model = get_model() learn = TrainLearner(model, dls, F.cross_entropy, cbs=[DeviceCB(), SingleBatchCB()]) with Hooks(model, _print_shape) as hooks: learn.fit(1, train=False) @fc.patch def summary(self:Learner): res = '|Module|Input|Output|Num params|\n|--|--|--|--|\n' tot = 0 def _f(hook, mod, inp, outp): nonlocal res,tot nparms = sum(o.numel() for o in mod.parameters()) tot += nparms res += f'|{type(mod).__name__}|{tuple(inp[0].shape)}|{tuple(outp.shape)}|{nparms}|\n' with Hooks(self.model, _f) as hooks: self.fit(1, lr=1, train=False, cbs=SingleBatchCB()) print("Tot params: ", tot) if fc.IN_NOTEBOOK: from IPython.display import Markdown return Markdown(res) else: print(res) TrainLearner(get_model(), dls, F.cross_entropy, cbs=DeviceCB()).summary() model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw) MomentumLearner(model, dls, F.cross_entropy, cbs=DeviceCB()).lr_find() lr = 2e-2 tmax = epochs * len(dls.train) sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax) xtra = [BatchSchedCB(sched)] model = get_model(act_gr, norm=nn.BatchNorm2d).apply(iw) learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW) learn.fit(epochs) import timm from timm.models.resnet import BasicBlock, ResNet, Bottleneck ' '.join(timm.list_models('*resnet*')) model = timm.create_model('resnet18d', in_chans=1, num_classes=10) # model = ResNet(in_chans=1, block=BasicBlock, layers=[2,2,2,2], stem_width=32, avg_down=True) lr = 2e-2 tmax = epochs * len(dls.train) sched = partial(lr_scheduler.OneCycleLR, max_lr=lr, total_steps=tmax) xtra = [BatchSchedCB(sched)] learn = TrainLearner(model, dls, F.cross_entropy, lr=lr, cbs=cbs+xtra, opt_func=optim.AdamW) learn.fit(epochs) import nbdev; nbdev.nbdev_export()