Data augmentation¶

In [ ]:

from nb_200 import *
import random
import pickle

Get the data¶

In [ ]:

device = torch.device('cuda', 0)

In [ ]:

class PetsData(DataBlock):
    types = Image,Category
    get_items = lambda source, self: [get_image_files(source)[0]]*100
    split = random_splitter()
    label_func = re_labeller(pat = r'/([^/]+)_\d+.jpg$')

In [ ]:

class CamvidData(DataBlock):
    types = Image,SegmentMask
    get_items = lambda source,self: [get_image_files(source/'images')[0]] * 100
    split = random_splitter()
    label_func = lambda o,self: self.source/'labels'/f'{o.stem}_P{o.suffix}'

In [ ]:

class BiwiData(DataBlock):
    types = Image,Points
    def __init__(self, source, *args, **kwargs):
        super().__init__(source, *args, **kwargs)
        self.fn2ctr = pickle.load(open(source/'centers.pkl', 'rb'))
        
    get_items = lambda source, self: [get_image_files(source/'images')[0]] * 100
    split = random_splitter()
    label_func = lambda o,self: [[0, 0], [120, 0], [0, 160], [120,160]]

In [ ]:

class CocoData(DataBlock):
    types = Image,BBox
    def __init__(self, source, *args, **kwargs):
        super().__init__(source, *args, **kwargs)
        images, lbl_bbox = get_annotations(source/'train.json')
        self.img2bbox = dict(zip(images, lbl_bbox))
        
    get_items = lambda source, self: [get_image_files(source/'train')[18]] * 100
    split = random_splitter()
    label_func = lambda o,self: self.img2bbox[o.name]
    
    def databunch(self, ds_tfms=None, dl_tfms=None, bs=64, tfm_kwargs=None, **kwargs):
        return super().databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=bs, tfm_kwargs=tfm_kwargs,
                                 collate_fn=bb_pad_collate, **kwargs)

In [ ]:

ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]

In [ ]:

pets_src   = untar_data(URLs.PETS)
camvid_src = untar_data(URLs.CAMVID_TINY)
biwi_src   = untar_data(URLs.BIWI_SAMPLE)
coco_src   = untar_data(URLs.COCO_TINY)

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

Flip and dihedral with PIL¶

In [ ]:

# export
class Flip(ImageTransform):
    _data_aug=True
    def __init__(self, p=0.5): self.p = p
    def randomize(self): self.do = random.random() < self.p
        
    def apply(self, x):
        return x.transpose(PIL.Image.FLIP_LEFT_RIGHT) if self.do else x
    def apply_point(self, x):
        if self.do: x[...,0] = -x[...,0]
        return x
    def apply_bbox(self, x): return (self.apply_point(x[0].view(-1,2)).view(-1,4), x[1])

In [ ]:

ds_tfms = [DecodeImg(), Flip(), ResizeFixed(128), ToByteTensor()]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

In [ ]:

# export
class Dihedral(ImageTransform):
    _data_aug=True
    def __init__(self, p=0.5): self.p = p
    def randomize(self): 
        self.idx = random.randint(0,7) if random.random() < self.p else 0
        
    def apply(self, x): return x if self.idx==0 else x.transpose(self.idx-1)
    def apply_point(self, x):
        if self.idx in [1, 3, 4, 7]: x[...,0] = -x[...,0]
        if self.idx in [2, 4, 5, 7]: x[...,1] = -x[...,1]
        if self.idx in [3, 5, 6, 7]: x = x.flip(1)
        return x
    
    def apply_bbox(self, x): 
        pnts = self.apply_point(x[0].view(-1,2)).view(-1,2,2)
        tl,dr = pnts.min(dim=1)[0],pnts.max(dim=1)[0]
        return [torch.cat([tl, dr], dim=1), x[1]]

In [ ]:

ds_tfms = [DecodeImg(), Dihedral(), ResizeFixed(128), ToByteTensor()]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

Affine and coords on the GPU¶

This is the main transform, that will apply affine and coordinates transform and do only one interpolation. Implementation differs for each type of target.

In [ ]:

# export
def clip_remove_empty(bbox, label):
    bbox = torch.clamp(bbox, -1, 1)
    empty = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) < 0.)
    if isinstance(label, torch.Tensor): label[empty] = 0
    else:
        for i,m in enumerate(empty):
            if m: label[i] = 0
    return [bbox, label]

In [ ]:

# export
class AffineAndCoordTfm(ImageTransform):
    _data_aug=True
    def __init__(self, aff_tfms, coord_tfms, size=None, mode='bilinear', padding_mode='reflection'):
        self.aff_tfms,self.coord_tfms,self.mode,self.padding_mode = aff_tfms,coord_tfms,mode,padding_mode
        self.size = None if size is None else (size,size) if isinstance(size, int) else tuple(size)
    
    def randomize(self):
        for t in self.aff_tfms+self.coord_tfms: t.randomize(self.x)
    
    def _get_affine_mat(self):
        aff_m = torch.eye(3, dtype=self.x.dtype, device=self.x.device)
        aff_m = aff_m.unsqueeze(0).expand(self.x.size(0), 3, 3)
        ms = [tfm() for tfm in self.aff_tfms]
        ms = [m for m in ms if m is not None]
        for m in ms: aff_m = aff_m @ m
        return aff_m
    
    def apply(self, x):
        bs = x.size(0)
        size = tuple(x.shape[-2:]) if self.size is None else self.size
        size = (bs,x.size(1)) + size
        coords = F.affine_grid(self._get_affine_mat()[:,:2], size)
        coords = apply_all(coords, self.coord_tfms)
        return F.grid_sample(x, coords, mode=self.mode, padding_mode=self.padding_mode)
    
    def apply_mask(self, y):
        self.old_mode,self.mode = self.mode,'nearest'
        res = self.apply(y.float())
        self.mode = self.old_mode
        return res.long()
    
    def apply_point(self, y):
        m = self._get_affine_mat()[:,:2]
        y = (y - m[:,:,2].unsqueeze(1)) @ torch.inverse(m[:,:2,:2].transpose(1,2))
        return apply_all(y, self.coord_tfms, filter_kwargs=True, invert=True)
    
    def apply_bbox(self, y):
        bbox,label = y
        bs,n = bbox.shape[:2]
        pnts = stack([bbox[...,:2], stack([bbox[...,0],bbox[...,3]],dim=2), 
                      stack([bbox[...,2],bbox[...,1]],dim=2), bbox[...,2:]], dim=2)
        pnts = self.apply_point(pnts.view(bs, 4*n, 2))
        pnts = pnts.view(bs, n, 4, 2)
        tl,dr = pnts.min(dim=2)[0],pnts.max(dim=2)[0]
        return clip_remove_empty(torch.cat([tl, dr], dim=2), label)

Affine¶

In [ ]:

# export
import math
from torch import stack, zeros_like as t0, ones_like as t1
from torch.distributions.bernoulli import Bernoulli

rotate¶

In [ ]:

# export
def mask_tensor(x, p=0.5, neutral=0.):
    if p==1.: return x
    if neutral != 0: x.add_(-neutral)
    mask = x.new_empty(*x.size()).bernoulli_(p)
    x.mul_(mask)
    return x.add_(neutral) if neutral != 0 else x

In [ ]:

# export
def masked_uniform(x, a, b, *sz, p=0.5, neutral=0.):
    return mask_tensor(x.new_empty(*sz).uniform_(a,b), p=p, neutral=neutral)

In [ ]:

# export
class Rotation():
    def __init__(self, degrees=10., p=0.5):
        self.range,self.p = (-degrees,degrees),p
    
    def randomize(self, x):
        thetas = masked_uniform(x, *self.range, x.size(0), p=self.p) * math.pi/180
        self.mat = stack([stack([thetas.cos(),  thetas.sin(), t0(thetas)], dim=1),
                          stack([-thetas.sin(), thetas.cos(), t0(thetas)], dim=1),
                          stack([t0(thetas),    t0(thetas),   t1(thetas)], dim=1)], dim=1)
    
    def __call__(self): return self.mat

In [ ]:

ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation(30.)], [])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

flip and dihedral affine¶

In [ ]:

# export
class FlipAffine():
    def __init__(self, p=0.5):
        self.p=p
    
    def randomize(self, x):
        mask = -2*x.new_empty(x.size(0)).bernoulli_(self.p)+1
        self.mat = stack([stack([mask,     t0(mask), t0(mask)], dim=1),
                          stack([t0(mask), t1(mask), t0(mask)], dim=1),
                          stack([t0(mask), t0(mask), t1(mask)], dim=1)], dim=1)
    
    def __call__(self): return self.mat

In [ ]:

# export
class DihedralAffine():
    def __init__(self, p=0.5):
        self.p=p
    
    def randomize(self, x):
        idx = mask_tensor(torch.randint(0, 8, (x.size(0),), device=x.device), p=self.p)
        xs = 1 - 2*(idx & 1)
        ys = 1 - (idx & 2)
        m0,m1 = (idx<4).long(),(idx>3).long()
        self.mat = stack([stack([xs*m0,  xs*m1,  t0(xs)], dim=1),
                          stack([ys*m1,  ys*m0,  t0(xs)], dim=1),
                          stack([t0(xs), t0(xs), t1(xs)], dim=1)], dim=1).float()
    
    def __call__(self): return self.mat

In [ ]:

dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([DihedralAffine()], [])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

zoom¶

In [ ]:

class Zoom():
    def __init__(self, max_zoom=1.1, p=0.5):
        self.range,self.p = (1,max_zoom),p
    
    def randomize(self, x):
        s = 1/masked_uniform(x, *self.range, x.size(0), p=self.p, neutral=1.)
        col_pct = x.new_empty(x.size(0)).uniform_(0.,1.)
        row_pct = x.new_empty(x.size(0)).uniform_(0.,1.)
        col_c = (1-s) * (2*col_pct - 1)
        row_c = (1-s) * (2*row_pct - 1)
        self.mat = stack([stack([s,     t0(s), col_c], dim=1),
                          stack([t0(s), s, row_c], dim=1),
                          stack([t0(s), t0(s), t1(s)], dim=1)], dim=1)
    
    def __call__(self): return self.mat

In [ ]:

dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Zoom()], [])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

Coordinates¶

warping¶

In [ ]:

# export
def find_coeffs(p1, p2):
    matrix = []
    p = p1[:,0,0]
    #The equations we'll need to solve.
    for i in range(p1.shape[1]):
        matrix.append(stack([p2[:,i,0], p2[:,i,1], t1(p), t0(p), t0(p), t0(p), -p1[:,i,0]*p2[:,i,0], -p1[:,i,0]*p2[:,i,1]]))
        matrix.append(stack([t0(p), t0(p), t0(p), p2[:,i,0], p2[:,i,1], t1(p), -p1[:,i,1]*p2[:,i,0], -p1[:,i,1]*p2[:,i,1]]))
    #The 8 scalars we seek are solution of AX = B
    A = stack(matrix).permute(2, 0, 1)
    B = p1.view(p1.shape[0], 8, 1)
    return torch.solve(B,A)[0]

In [ ]:

# export
def apply_perspective(coords, coeffs):
    sz = coords.shape
    coords = coords.view(sz[0], -1, 2)
    coeffs = torch.cat([coeffs, t1(coeffs[:,:1])], dim=1).view(coeffs.shape[0], 3,3)
    coords = coords @ coeffs[...,:2].transpose(1,2) + coeffs[...,2].unsqueeze(1)
    coords.div_(coords[...,2].unsqueeze(-1))
    return coords[...,:2].view(*sz)

In [ ]:

# export
class Warp():
    def __init__(self, magnitude=0.2, p=0.5):
        self.coeffs,self.magnitude,self.p = None,magnitude,p
    
    def randomize(self, x):
        up_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)
        lr_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)
        orig_pts = torch.tensor([[-1,-1], [-1,1], [1,-1], [1,1]], dtype=x.dtype, device=x.device)
        self.orig_pts = orig_pts.unsqueeze(0).expand(x.size(0),4,2)
        targ_pts = stack([stack([-1-up_t, -1-lr_t]), stack([-1+up_t, 1+lr_t]), 
                          stack([ 1+up_t, -1+lr_t]), stack([ 1-up_t, 1-lr_t])])
        self.targ_pts = targ_pts.permute(2,0,1)
    
    def __call__(self, x, invert=False): 
        coeffs = find_coeffs(self.targ_pts, self.orig_pts) if invert else find_coeffs(self.orig_pts, self.targ_pts)
        return apply_perspective(x, coeffs)

In [ ]:

dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation()], [Warp()])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

Ligthing transforms¶

In [ ]:

# export
def logit(x):
    "Logit of `x`, clamped to avoid inf."
    x = x.clamp(1e-7, 1-1e-7)
    return -(1/x-1).log()

In [ ]:

# export
class LightingTransform(ImageTransform):
    _order = 15
    _data_aug=True
    def __init__(self, tfms): self.tfms=listify(tfms)
    def randomize(self): 
        for t in self.tfms: t.randomize(self.x)
    
    def apply(self,x):       return torch.sigmoid(apply_all(logit(x), self.tfms))
    def apply_mask(self, x): return x

In [ ]:

# export
from math import log
def masked_log_uniform(x, a, b, *sz, p=0.5, neutral=0.):
    return torch.exp(masked_uniform(x, log(a), log(b), *sz, p=p, neutral=neutral))

In [ ]:

# export
class Brightness():
    "Apply `change` in brightness of image `x`."
    def __init__(self, max_lighting=0.2, p=0.75): 
        self.p = p
        self.range = (0.5*(1-max_lighting), 0.5*(1+max_lighting))
    def randomize(self, x): 
        self.change = masked_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p, neutral=0.5)
    def __call__(self, x): return x.add_(self.change)
    
class Contrast():
    "Apply `change` in brightness of image `x`."
    def __init__(self, max_lighting=0.2, p=0.75): 
        self.p = p
        self.range = (1-max_lighting, 1/(1-max_lighting))
    def randomize(self, x): 
        self.change = masked_log_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p)
    def __call__(self, x): return x.mul_(self.change)

In [ ]:

dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(1), Contrast(0.5)])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

All at once¶

In [ ]:

ds_tfms = [DecodeImg(), ResizeFixed(224), ToByteTensor(), Flip()]
dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(), Contrast()]), 
           AffineAndCoordTfm([Rotation(), Zoom()], [Warp()])]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

%timeit -n 10 _ = coco.one_batch(0)

Crops and pads¶

On the CPU¶

crop¶

In [ ]:

class CenterCrop(ImageTransform):
    _order = 12
    def __init__(self, size):
        if isinstance(size,int): size=(size,size)
        self.size = (size[1],size[0])
    
    def randomize(self):
        w,h = self.x.size
        self.tl = (w-self.size[0]//2, h-self.size[1]//2)
    
    def apply(self, x):
        return x.crop((self.tl[0],self.tl[1],self.tl[0]+self.size[0],self.tl[1]+self.size[1]))
    
    def apply_point(self, y):
        old_sz,new_sz,tl = map(lambda o: tensor(o).float(), (self.x.size,self.size,self.tl))
        return (y + 1) * old_sz/new_sz - tl * 2/new_sz - 1
    
    def apply_bbox(self, y):
        bbox,label = y
        bbox = self.apply_point(bbox.view(-1,2)).view(-1,4)
        return clip_remove_empty(bbox, label)

In [ ]:

class RandomCrop(CenterCrop):
    def randomize(self):
        w,h = self.x.size
        if self.filt != 0: self.tl = (w-self.size[0]//2, h-self.size[1]//2)
        self.tl = (random.randint(0,w-self.size[0]), random.randint(0,h-self.size[1]))

In [ ]:

ds_tfms = [DecodeImg(), RandomCrop(100), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

pad¶

In [ ]:

import torchvision.transforms.functional as tvfunc

In [ ]:

class Pad(CenterCrop):
    _order = 15
    _pad_modes = {'zeros': 'constant', 'border': 'replicate', 'reflection': 'reflect'}
    def __init__(self, size, mode='zeros'):
        if isinstance(size,int): size=(size,size)
        self.size = (size[1],size[0])
        self.size,self.mode = size,self._pad_modes[mode]
    
    def randomize(self):
        ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]
        self.tl = (-ph//2,-pw//2)
        self.pad = (pw//2,ph//2,pw-pw//2,ph-ph//2)
    
    def apply(self, x): return tvfunc.pad(x, self.pad, padding_mode=self.mode)

In [ ]:

class RandomPad(Pad):
    def randomize(self):
        ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]
        c,r = random.randint(0,ph),random.randint(0,pw)
        self.tl = (-r,-c)
        self.pad = (r,c,pw-r,ph-c)

In [ ]:

ds_tfms = [DecodeImg(), RandomPad(150, mode='reflection'), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]

In [ ]:

pets   = PetsData  (pets_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi   = BiwiData  (biwi_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco   = CocoData  (coco_src)  .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)

In [ ]:

coco.show_batch()

In [ ]: