from nb_200 import *
import random
import pickle
device = torch.device('cuda', 0)
class PetsData(DataBlock):
types = Image,Category
get_items = lambda source, self: [get_image_files(source)[0]]*100
split = random_splitter()
label_func = re_labeller(pat = r'/([^/]+)_\d+.jpg$')
class CamvidData(DataBlock):
types = Image,SegmentMask
get_items = lambda source,self: [get_image_files(source/'images')[0]] * 100
split = random_splitter()
label_func = lambda o,self: self.source/'labels'/f'{o.stem}_P{o.suffix}'
class BiwiData(DataBlock):
types = Image,Points
def __init__(self, source, *args, **kwargs):
super().__init__(source, *args, **kwargs)
self.fn2ctr = pickle.load(open(source/'centers.pkl', 'rb'))
get_items = lambda source, self: [get_image_files(source/'images')[0]] * 100
split = random_splitter()
label_func = lambda o,self: [[0, 0], [120, 0], [0, 160], [120,160]]
class CocoData(DataBlock):
types = Image,BBox
def __init__(self, source, *args, **kwargs):
super().__init__(source, *args, **kwargs)
images, lbl_bbox = get_annotations(source/'train.json')
self.img2bbox = dict(zip(images, lbl_bbox))
get_items = lambda source, self: [get_image_files(source/'train')[18]] * 100
split = random_splitter()
label_func = lambda o,self: self.img2bbox[o.name]
def databunch(self, ds_tfms=None, dl_tfms=None, bs=64, tfm_kwargs=None, **kwargs):
return super().databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=bs, tfm_kwargs=tfm_kwargs,
collate_fn=bb_pad_collate, **kwargs)
ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]
pets_src = untar_data(URLs.PETS)
camvid_src = untar_data(URLs.CAMVID_TINY)
biwi_src = untar_data(URLs.BIWI_SAMPLE)
coco_src = untar_data(URLs.COCO_TINY)
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
# export
class Flip(ImageTransform):
_data_aug=True
def __init__(self, p=0.5): self.p = p
def randomize(self): self.do = random.random() < self.p
def apply(self, x):
return x.transpose(PIL.Image.FLIP_LEFT_RIGHT) if self.do else x
def apply_point(self, x):
if self.do: x[...,0] = -x[...,0]
return x
def apply_bbox(self, x): return (self.apply_point(x[0].view(-1,2)).view(-1,4), x[1])
ds_tfms = [DecodeImg(), Flip(), ResizeFixed(128), ToByteTensor()]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
# export
class Dihedral(ImageTransform):
_data_aug=True
def __init__(self, p=0.5): self.p = p
def randomize(self):
self.idx = random.randint(0,7) if random.random() < self.p else 0
def apply(self, x): return x if self.idx==0 else x.transpose(self.idx-1)
def apply_point(self, x):
if self.idx in [1, 3, 4, 7]: x[...,0] = -x[...,0]
if self.idx in [2, 4, 5, 7]: x[...,1] = -x[...,1]
if self.idx in [3, 5, 6, 7]: x = x.flip(1)
return x
def apply_bbox(self, x):
pnts = self.apply_point(x[0].view(-1,2)).view(-1,2,2)
tl,dr = pnts.min(dim=1)[0],pnts.max(dim=1)[0]
return [torch.cat([tl, dr], dim=1), x[1]]
ds_tfms = [DecodeImg(), Dihedral(), ResizeFixed(128), ToByteTensor()]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
This is the main transform, that will apply affine and coordinates transform and do only one interpolation. Implementation differs for each type of target.
# export
def clip_remove_empty(bbox, label):
bbox = torch.clamp(bbox, -1, 1)
empty = ((bbox[...,2] - bbox[...,0])*(bbox[...,3] - bbox[...,1]) < 0.)
if isinstance(label, torch.Tensor): label[empty] = 0
else:
for i,m in enumerate(empty):
if m: label[i] = 0
return [bbox, label]
# export
class AffineAndCoordTfm(ImageTransform):
_data_aug=True
def __init__(self, aff_tfms, coord_tfms, size=None, mode='bilinear', padding_mode='reflection'):
self.aff_tfms,self.coord_tfms,self.mode,self.padding_mode = aff_tfms,coord_tfms,mode,padding_mode
self.size = None if size is None else (size,size) if isinstance(size, int) else tuple(size)
def randomize(self):
for t in self.aff_tfms+self.coord_tfms: t.randomize(self.x)
def _get_affine_mat(self):
aff_m = torch.eye(3, dtype=self.x.dtype, device=self.x.device)
aff_m = aff_m.unsqueeze(0).expand(self.x.size(0), 3, 3)
ms = [tfm() for tfm in self.aff_tfms]
ms = [m for m in ms if m is not None]
for m in ms: aff_m = aff_m @ m
return aff_m
def apply(self, x):
bs = x.size(0)
size = tuple(x.shape[-2:]) if self.size is None else self.size
size = (bs,x.size(1)) + size
coords = F.affine_grid(self._get_affine_mat()[:,:2], size)
coords = apply_all(coords, self.coord_tfms)
return F.grid_sample(x, coords, mode=self.mode, padding_mode=self.padding_mode)
def apply_mask(self, y):
self.old_mode,self.mode = self.mode,'nearest'
res = self.apply(y.float())
self.mode = self.old_mode
return res.long()
def apply_point(self, y):
m = self._get_affine_mat()[:,:2]
y = (y - m[:,:,2].unsqueeze(1)) @ torch.inverse(m[:,:2,:2].transpose(1,2))
return apply_all(y, self.coord_tfms, filter_kwargs=True, invert=True)
def apply_bbox(self, y):
bbox,label = y
bs,n = bbox.shape[:2]
pnts = stack([bbox[...,:2], stack([bbox[...,0],bbox[...,3]],dim=2),
stack([bbox[...,2],bbox[...,1]],dim=2), bbox[...,2:]], dim=2)
pnts = self.apply_point(pnts.view(bs, 4*n, 2))
pnts = pnts.view(bs, n, 4, 2)
tl,dr = pnts.min(dim=2)[0],pnts.max(dim=2)[0]
return clip_remove_empty(torch.cat([tl, dr], dim=2), label)
# export
import math
from torch import stack, zeros_like as t0, ones_like as t1
from torch.distributions.bernoulli import Bernoulli
# export
def mask_tensor(x, p=0.5, neutral=0.):
if p==1.: return x
if neutral != 0: x.add_(-neutral)
mask = x.new_empty(*x.size()).bernoulli_(p)
x.mul_(mask)
return x.add_(neutral) if neutral != 0 else x
# export
def masked_uniform(x, a, b, *sz, p=0.5, neutral=0.):
return mask_tensor(x.new_empty(*sz).uniform_(a,b), p=p, neutral=neutral)
# export
class Rotation():
def __init__(self, degrees=10., p=0.5):
self.range,self.p = (-degrees,degrees),p
def randomize(self, x):
thetas = masked_uniform(x, *self.range, x.size(0), p=self.p) * math.pi/180
self.mat = stack([stack([thetas.cos(), thetas.sin(), t0(thetas)], dim=1),
stack([-thetas.sin(), thetas.cos(), t0(thetas)], dim=1),
stack([t0(thetas), t0(thetas), t1(thetas)], dim=1)], dim=1)
def __call__(self): return self.mat
ds_tfms = [DecodeImg(), ResizeFixed(128), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation(30.)], [])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
# export
class FlipAffine():
def __init__(self, p=0.5):
self.p=p
def randomize(self, x):
mask = -2*x.new_empty(x.size(0)).bernoulli_(self.p)+1
self.mat = stack([stack([mask, t0(mask), t0(mask)], dim=1),
stack([t0(mask), t1(mask), t0(mask)], dim=1),
stack([t0(mask), t0(mask), t1(mask)], dim=1)], dim=1)
def __call__(self): return self.mat
# export
class DihedralAffine():
def __init__(self, p=0.5):
self.p=p
def randomize(self, x):
idx = mask_tensor(torch.randint(0, 8, (x.size(0),), device=x.device), p=self.p)
xs = 1 - 2*(idx & 1)
ys = 1 - (idx & 2)
m0,m1 = (idx<4).long(),(idx>3).long()
self.mat = stack([stack([xs*m0, xs*m1, t0(xs)], dim=1),
stack([ys*m1, ys*m0, t0(xs)], dim=1),
stack([t0(xs), t0(xs), t1(xs)], dim=1)], dim=1).float()
def __call__(self): return self.mat
dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([DihedralAffine()], [])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
class Zoom():
def __init__(self, max_zoom=1.1, p=0.5):
self.range,self.p = (1,max_zoom),p
def randomize(self, x):
s = 1/masked_uniform(x, *self.range, x.size(0), p=self.p, neutral=1.)
col_pct = x.new_empty(x.size(0)).uniform_(0.,1.)
row_pct = x.new_empty(x.size(0)).uniform_(0.,1.)
col_c = (1-s) * (2*col_pct - 1)
row_c = (1-s) * (2*row_pct - 1)
self.mat = stack([stack([s, t0(s), col_c], dim=1),
stack([t0(s), s, row_c], dim=1),
stack([t0(s), t0(s), t1(s)], dim=1)], dim=1)
def __call__(self): return self.mat
dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Zoom()], [])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
# export
def find_coeffs(p1, p2):
matrix = []
p = p1[:,0,0]
#The equations we'll need to solve.
for i in range(p1.shape[1]):
matrix.append(stack([p2[:,i,0], p2[:,i,1], t1(p), t0(p), t0(p), t0(p), -p1[:,i,0]*p2[:,i,0], -p1[:,i,0]*p2[:,i,1]]))
matrix.append(stack([t0(p), t0(p), t0(p), p2[:,i,0], p2[:,i,1], t1(p), -p1[:,i,1]*p2[:,i,0], -p1[:,i,1]*p2[:,i,1]]))
#The 8 scalars we seek are solution of AX = B
A = stack(matrix).permute(2, 0, 1)
B = p1.view(p1.shape[0], 8, 1)
return torch.solve(B,A)[0]
# export
def apply_perspective(coords, coeffs):
sz = coords.shape
coords = coords.view(sz[0], -1, 2)
coeffs = torch.cat([coeffs, t1(coeffs[:,:1])], dim=1).view(coeffs.shape[0], 3,3)
coords = coords @ coeffs[...,:2].transpose(1,2) + coeffs[...,2].unsqueeze(1)
coords.div_(coords[...,2].unsqueeze(-1))
return coords[...,:2].view(*sz)
# export
class Warp():
def __init__(self, magnitude=0.2, p=0.5):
self.coeffs,self.magnitude,self.p = None,magnitude,p
def randomize(self, x):
up_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)
lr_t = masked_uniform(x, -self.magnitude, self.magnitude, x.size(0), p=self.p)
orig_pts = torch.tensor([[-1,-1], [-1,1], [1,-1], [1,1]], dtype=x.dtype, device=x.device)
self.orig_pts = orig_pts.unsqueeze(0).expand(x.size(0),4,2)
targ_pts = stack([stack([-1-up_t, -1-lr_t]), stack([-1+up_t, 1+lr_t]),
stack([ 1+up_t, -1+lr_t]), stack([ 1-up_t, 1-lr_t])])
self.targ_pts = targ_pts.permute(2,0,1)
def __call__(self, x, invert=False):
coeffs = find_coeffs(self.targ_pts, self.orig_pts) if invert else find_coeffs(self.orig_pts, self.targ_pts)
return apply_perspective(x, coeffs)
dl_tfms = [Cuda(device), ToFloatTensor(), AffineAndCoordTfm([Rotation()], [Warp()])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
# export
def logit(x):
"Logit of `x`, clamped to avoid inf."
x = x.clamp(1e-7, 1-1e-7)
return -(1/x-1).log()
# export
class LightingTransform(ImageTransform):
_order = 15
_data_aug=True
def __init__(self, tfms): self.tfms=listify(tfms)
def randomize(self):
for t in self.tfms: t.randomize(self.x)
def apply(self,x): return torch.sigmoid(apply_all(logit(x), self.tfms))
def apply_mask(self, x): return x
# export
from math import log
def masked_log_uniform(x, a, b, *sz, p=0.5, neutral=0.):
return torch.exp(masked_uniform(x, log(a), log(b), *sz, p=p, neutral=neutral))
# export
class Brightness():
"Apply `change` in brightness of image `x`."
def __init__(self, max_lighting=0.2, p=0.75):
self.p = p
self.range = (0.5*(1-max_lighting), 0.5*(1+max_lighting))
def randomize(self, x):
self.change = masked_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p, neutral=0.5)
def __call__(self, x): return x.add_(self.change)
class Contrast():
"Apply `change` in brightness of image `x`."
def __init__(self, max_lighting=0.2, p=0.75):
self.p = p
self.range = (1-max_lighting, 1/(1-max_lighting))
def randomize(self, x):
self.change = masked_log_uniform(x, *self.range, x.size(0), *([1]*(x.dim()-1)), p=self.p)
def __call__(self, x): return x.mul_(self.change)
dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(1), Contrast(0.5)])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
ds_tfms = [DecodeImg(), ResizeFixed(224), ToByteTensor(), Flip()]
dl_tfms = [Cuda(device), ToFloatTensor(), LightingTransform([Brightness(), Contrast()]),
AffineAndCoordTfm([Rotation(), Zoom()], [Warp()])]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
%timeit -n 10 _ = coco.one_batch(0)
class CenterCrop(ImageTransform):
_order = 12
def __init__(self, size):
if isinstance(size,int): size=(size,size)
self.size = (size[1],size[0])
def randomize(self):
w,h = self.x.size
self.tl = (w-self.size[0]//2, h-self.size[1]//2)
def apply(self, x):
return x.crop((self.tl[0],self.tl[1],self.tl[0]+self.size[0],self.tl[1]+self.size[1]))
def apply_point(self, y):
old_sz,new_sz,tl = map(lambda o: tensor(o).float(), (self.x.size,self.size,self.tl))
return (y + 1) * old_sz/new_sz - tl * 2/new_sz - 1
def apply_bbox(self, y):
bbox,label = y
bbox = self.apply_point(bbox.view(-1,2)).view(-1,4)
return clip_remove_empty(bbox, label)
class RandomCrop(CenterCrop):
def randomize(self):
w,h = self.x.size
if self.filt != 0: self.tl = (w-self.size[0]//2, h-self.size[1]//2)
self.tl = (random.randint(0,w-self.size[0]), random.randint(0,h-self.size[1]))
ds_tfms = [DecodeImg(), RandomCrop(100), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()
import torchvision.transforms.functional as tvfunc
class Pad(CenterCrop):
_order = 15
_pad_modes = {'zeros': 'constant', 'border': 'replicate', 'reflection': 'reflect'}
def __init__(self, size, mode='zeros'):
if isinstance(size,int): size=(size,size)
self.size = (size[1],size[0])
self.size,self.mode = size,self._pad_modes[mode]
def randomize(self):
ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]
self.tl = (-ph//2,-pw//2)
self.pad = (pw//2,ph//2,pw-pw//2,ph-ph//2)
def apply(self, x): return tvfunc.pad(x, self.pad, padding_mode=self.mode)
class RandomPad(Pad):
def randomize(self):
ph,pw = self.size[0]-self.x.size[1],self.size[1]-self.x.size[0]
c,r = random.randint(0,ph),random.randint(0,pw)
self.tl = (-r,-c)
self.pad = (r,c,pw-r,ph-c)
ds_tfms = [DecodeImg(), RandomPad(150, mode='reflection'), ToByteTensor()]
dl_tfms = [Cuda(device), ToFloatTensor()]
pets = PetsData (pets_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
camvid = CamvidData(camvid_src).databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
biwi = BiwiData (biwi_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco = CocoData (coco_src) .databunch(ds_tfms=ds_tfms, dl_tfms=dl_tfms, bs=16)
coco.show_batch()