%reload_ext autoreload
%autoreload 2
from fastai import *
from fastai.vision import *
Download the dataset here from the pytoch tutorial on transforms. Unzip it in the data directory, so that data/faces/ contains the images and the csv file.
PATH = Path('../../data/faces/')
img_fns = get_image_files(PATH)
len(img_fns)
poses = pd.read_csv(PATH/'face_landmarks.csv')
poses.head()
pose_dict = {o[0]:o[1:].astype(np.float32) for o in poses.values}
Reading the coordinates. We will adopt the pytorch convention in grid_sampler where the coordinates are normalized between -1 and 1. (-1,-1) is the top left corner, (1,1) the bottom right. This function scales or unscales the coordinates to that purpose
#export
def scale_flow(flow, to_unit=True):
s = tensor([flow.size[0]/2,flow.size[1]/2])[None]
if to_unit: flow.flow = flow.flow/s-1
else: flow.flow = (flow.flow+1)*s
return flow
pose_pnts = []
for i, fname in enumerate(img_fns):
size = open_image(fname).size
coords = tensor(pose_dict[fname.name]).view(-1,2)
pose_pnts.append(coords)
len(pose_pnts)
pose_pnts[0].shape,pose_pnts[0][0]
Let's have a look at the data.
def show_pose(img, pnts, ax=None):
if ax is None: _,ax = plt.subplots()
img.show(ax=ax, hide_axis=False)
ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r')
img = open_image(img_fns[0])
show_pose(img, pose_pnts[0])
So when we change the picture, the points must be changed accordingly. Specifically, transforms that need to be applied to the points are the reciprocal functions that those applied to pixel values. To deal with that:
affine_inv_mult function to do the inverse of the affine transformsinvert bool argument (default False) to coord transforms to get that inverse operation doneAdditionaly, we use singledispatch to change the implementation of pixel transforms for coords.
#export
def _affine_inv_mult(c, m):
"Applies the inverse affine transform described in m"
size = c.flow.size()
h,w = c.size
m[0,1] *= h/w
m[1,0] *= w/h
c.flow = c.flow.view(-1,2)
a = torch.inverse(m[:2,:2].t())
c.flow = torch.mm(c.flow - m[:2,2], a).view(size)
return c
#export
class ImagePoints1(Image):
"Support applying transforms to a flow points."
def __init__(self, flow:FlowField, scale:bool=True, y_first=True):
"Create from raw tensor image data `px`."
if scale: flow = scale_flow(flow)
if y_first: flow.flow = flow.flow.flip(1)
self._flow = flow
self._affine_mat = None
self.flow_func = []
self.sample_kwargs = {}
self.transformed = False
def clone(self):
"Mimic the behavior of torch.clone for `Image` objects."
return self.__class__(FlowField(self.size, self.flow.flow.clone()), scale=False, y_first=False)
@property
def shape(self)->Tuple[int,int,int]: return (1, *self._flow.size)
@property
def size(self)->Tuple[int,int]: return self._flow.size
@size.setter
def size(self, sz:int): self._flow.size=sz
@property
def device(self)->torch.device: return self._flow.flow.device
def __repr__(self): return f'{self.__class__.__name__} {tuple(self.size)}'
@property
def flow(self)->FlowField:
"Access the flow-field grid after applying queued affine and coord transforms."
if self._affine_mat is not None:
self._flow = _affine_inv_mult(self._flow, self._affine_mat)
self._affine_mat = None
self.transformed = True
if len(self.flow_func) != 0:
for f in self.flow_func[::-1]: self._flow = f(self._flow)
self.transformed = True
self.flow_func = []
return self._flow
@flow.setter
def flow(self,v:FlowField): self._flow=v
def coord(self, func:CoordFunc, *args, **kwargs)->'Image':
"Put `func` with `args` and `kwargs` in `self.flow_func` for later."
if 'invert' in kwargs: kwargs['invert'] = True
else: warn(f"{func.__name__} isn't implemented for `ImagePoints`.")
self.flow_func.append(partial(func, *args, **kwargs))
return self
def lighting(self, func:LightingFunc, *args:Any, **kwargs:Any)->'Image': return self
def pixel(self, func:PixelFunc, *args, **kwargs)->'Image':
"Equivalent to `self = func_flow(self)`."
self = func(self, *args, **kwargs)
self.transformed=True
return self
def refresh(self):
return self
def resize(self, size:Union[int,TensorImageSize]):
"Resize the image to `size`, size can be a single int."
if isinstance(size, int): size=(1, size, size)
self._flow.size = size[1:]
return self
@property
def data(self)->TensorImage:
"Return the points associated to this object."
flow = self.flow #This updates flow before we test if some transforms happened
if self.transformed:
if 'remove_out' not in self.sample_kwargs or self.sample_kwargs['remove_out']:
flow = _remove_points_out(flow)
self.transformed=False
return flow.flow.flip(1)
def show(self, ax=None, figsize=(3,3), title:Optional[str]=None, hide_axis:bool=True):
if ax is None: _,ax = plt.subplots(figsize=figsize)
pnt = scale_flow(FlowField(self.size, self.data), to_unit=False).flow.flip(1)
ax.scatter(pnt[:, 0], pnt[:, 1], s=10, marker='.', c='r')
if hide_axis: ax.axis('off')
if title: ax.set_title(title)
#export
def _remove_points_out(flow:FlowField):
pad_mask = (flow.flow[:,0] >= -1) * (flow.flow[:,0] <= 1) * (flow.flow[:,1] >= -1) * (flow.flow[:,1] <= 1)
flow.flow = flow.flow[pad_mask]
return flow
img = open_image(img_fns[0])
pnts = ImagePoints1(FlowField(img.size, pose_pnts[0].flip(1)))
img.show(y=pnts)
def test_tfm(x, y, tfms, **kwargs):
tfm_x = apply_tfms(tfms, x, **kwargs)
tfm_y = apply_tfms(tfms, y, do_resolve=False, **kwargs)
return tfm_x, tfm_y
Pixel tranforms aren't supported for points. The only pixel transforms in fastai are flip_lr, dihedral, crop, pad and crop_pad. We can implement the first two at an affine or coord level, the only downside is that it will trigger an unnecessary interpolation for the image (if we didn't need one). The last two are a bit more messy because they change the size of the image...
#export
@TfmAffine
def flip_affine() -> TfmAffine:
return [[-1, 0, 0.],
[0, 1, 0],
[0, 0, 1.]]
tfms = [rand_zoom(scale=(1.,1.25)), rotate(degrees=(-30,30)), flip_affine(p=0.5)]
_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
tfm_x, tfm_y = test_tfm(img, pnts, tfms)
tfm_x.show(ax=ax, y = tfm_y)
By default, point outs of the image are removed.
tfms = [rotate(degrees=-30)]
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros')
tfm_x.show(y=tfm_y)
But you can change this with remove_out=False.
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', remove_out=False)
tfm_x.show(y=tfm_y)
#export
def _pad_coord(x, row_pad:int, col_pad:int, mode='zeros'):
#TODO: implement other padding modes than zeros?
h,w = x.size
pad = torch.Tensor([w/(w + 2*col_pad), h/(h + 2*row_pad)])
x.flow = FlowField((h+2*row_pad, w+2*col_pad) , x.flow.flow * pad[None])
#x.flow.flow.mul_(pad[None])
#x.size = (h+2*row_pad, w+2*col_pad)
return x
#export
from functools import singledispatch
#export
_pad_mode_convert = {'reflection':'reflect', 'zeros':'constant', 'border':'replicate'}
@partial(TfmPixel, order=-10)
@singledispatch
def pad(x, padding:int, mode='reflection'):
"Pad `x` with `padding` pixels. `mode` fills in space ('zeros','reflection','border')."
mode = _pad_mode_convert[mode]
return F.pad(x[None], (padding,)*4, mode=mode)[0]
@pad.register(ImagePoints1)
def _(x, padding:int, mode='reflection'):
return _pad_coord(x, padding, padding, mode)
@TfmPixel
@singledispatch
def crop(x, size, row_pct:uniform=0.5, col_pct:uniform=0.5):
"Crop `x` to `size` pixels. `row_pct`,`col_pct` select focal point of crop."
size = listify(size,2)
rows,cols = size
row = int((x.size(1)-rows+1) * row_pct)
col = int((x.size(2)-cols+1) * col_pct)
return x[:, row:row+rows, col:col+cols].contiguous()
@crop.register(ImagePoints1)
def _(x, size, row_pct=0.5, col_pct=0.5):
h,w = x.size
rows,cols = listify(size, 2)
x.flow.flow.mul_(torch.Tensor([w/cols, h/rows])[None])
row = int((h-rows+1) * row_pct)
col = int((w-cols+1) * col_pct)
x.flow.flow.add_(-1 + torch.Tensor([w/cols-2*col/cols, h/rows-2*row/rows])[None])
x.size = (rows, cols)
return x
@TfmCrop
@singledispatch
def crop_pad(x, size, padding_mode='reflection',
row_pct:uniform = 0.5, col_pct:uniform = 0.5):
"Crop and pad tfm - `row_pct`,`col_pct` sets focal point."
padding_mode = _pad_mode_convert[padding_mode]
size = listify(size,2)
if x.shape[1:] == size: return x
rows,cols = size
if x.size(1)<rows or x.size(2)<cols:
row_pad = max((rows-x.size(1)+1)//2, 0)
col_pad = max((cols-x.size(2)+1)//2, 0)
x = F.pad(x[None], (col_pad,col_pad,row_pad,row_pad), mode=padding_mode)[0]
row = int((x.size(1)-rows+1)*row_pct)
col = int((x.size(2)-cols+1)*col_pct)
x = x[:, row:row+rows, col:col+cols]
return x.contiguous() # without this, get NaN later - don't know why
@crop_pad.register(ImagePoints1)
def _(x, size, padding_mode='reflection', row_pct = 0.5, col_pct = 0.5):
size = listify(size,2)
rows,cols = size
if x.size[0]<rows or x.size[1]<cols:
row_pad = max((rows-x.size[0]+1)//2, 0)
col_pad = max((cols-x.size[1]+1)//2, 0)
x = _pad_coord(x, row_pad, col_pad)
return crop(x,(rows,cols), row_pct, col_pct)
import fastai
fastai.vision.transform.pad = pad
fastai.vision.transform.crop = crop
fastai.vision.transform.crop_pad = crop_pad
#tfms = [pad(padding=100)]
tfms = [crop_pad(size=300, row_pct=(0.,1.), col_pct=(0.,1.))]
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', size=300)
tfm_x.show(y=tfm_y)
#export
@TfmAffine
def dihedral_affine(k:partial(uniform_int,0,8)):
"Randomly flip `x` image based on `k`."
x = -1 if k&1 else 1
y = -1 if k&2 else 1
if k&4: return [[0, x, 0.],
[y, 0, 0],
[0, 0, 1.]]
return [[x, 0, 0.],
[0, y, 0],
[0, 0, 1.]]
_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
tfms = [dihedral_affine(p=1)]
tfms[0].resolved = {'k':i}
tfm_x = apply_tfms(tfms, img, do_resolve=False)
tfm_y = apply_tfms(tfms, pnts, do_resolve=False)
tfm_x.show(ax=ax, y = tfm_y)
Change in the current implementation to support points data augmentation.
#export
def _find_coeffs(orig_pts:Points, targ_pts:Points)->Tensor:
"Find 8 coeff mentioned [here](https://web.archive.org/web/20150222120106/xenia.media.mit.edu/~cwren/interpolator/)."
matrix = []
#The equations we'll need to solve.
for p1, p2 in zip(targ_pts, orig_pts):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])
A = FloatTensor(matrix)
B = FloatTensor(orig_pts).view(8)
#The 8 scalars we seek are solution of AX = B
return torch.gesv(B,A)[0][:,0]
def _apply_perspective(coords:FlowField, coeffs:Points)->FlowField:
"Transform `coords` with `coeffs`."
size = coords.flow.size()
#compress all the dims expect the last one ang adds ones, coords become N * 3
coords.flow = coords.flow.view(-1,2)
#Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
coords.flow = torch.addmm(coeffs[:,2], coords.flow, coeffs[:,:2].t())
coords.flow.mul_(1/coords.flow[:,2].unsqueeze(1))
coords.flow = coords.flow[:,:2].view(size)
return coords
_orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
def _perspective_warp(c:FlowField, targ_pts:Points, invert=False):
"Apply warp to `targ_pts` from `_orig_pts` to `c` `FlowField`."
if invert: return _apply_perspective(c, _find_coeffs(targ_pts, _orig_pts))
return _apply_perspective(c, _find_coeffs(_orig_pts, targ_pts))
@TfmCoord
def perspective_warp(c, magnitude:partial(uniform,size=8)=0, invert=False):
"Apply warp of `magnitude` to `c`."
magnitude = magnitude.view(4,2)
targ_pts = [[x+m for x,m in zip(xs, ms)] for xs, ms in zip(_orig_pts, magnitude)]
return _perspective_warp(c, targ_pts, invert)
@TfmCoord
def symmetric_warp(c, magnitude:partial(uniform,size=4)=0, invert=False):
"Apply symmetric warp of `magnitude` to `c`."
m = listify(magnitude, 4)
targ_pts = [[-1-m[3],-1-m[1]], [-1-m[2],1+m[1]], [1+m[3],-1-m[0]], [1+m[2],1+m[0]]]
return _perspective_warp(c, targ_pts, invert)
@TfmCoord
def tilt(c, direction:uniform_int, magnitude:uniform=0, invert=False):
"Tilt `c` field with random `direction` and `magnitude`."
orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
if direction == 0: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]
elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]
elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]
elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]]
coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
return _apply_perspective(c, coeffs)
@TfmCoord
def skew(c, direction:uniform_int, magnitude:uniform=0, invert=False):
"Skew `c` field with random `direction` and `magnitude`."
orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
if direction == 0: targ_pts = [[-1-magnitude,-1], [-1,1], [1,-1], [1,1]]
elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1], [1,-1], [1,1]]
elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1,1]]
elif direction == 3: targ_pts = [[-1,-1], [-1,1+magnitude], [1,-1], [1,1]]
elif direction == 4: targ_pts = [[-1,-1], [-1,1], [1+magnitude,-1], [1,1]]
elif direction == 5: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1]]
elif direction == 6: targ_pts = [[-1,-1], [-1,1], [1,-1], [1+magnitude,1]]
elif direction == 7: targ_pts = [[-1,-1], [-1,1], [1,-1], [1,1+magnitude]]
coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
return _apply_perspective(c, coeffs)
tfms = [tilt(direction=(0,3), magnitude=0.4)]
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', size=300)
tfm_x.show(y=tfm_y)
tfms = [symmetric_warp(magnitude=(-0.4,0.4))]
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros')
tfm_x.show(y=tfm_y)
def get_transforms(do_flip:bool=True, flip_vert:bool=False, max_rotate:float=10., max_zoom:float=1.1,
max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75,
p_lighting:float=0.75, xtra_tfms:float=None)->Collection[Transform]:
"Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms."
res = [rand_crop()]
if do_flip: res.append(dihedral() if flip_vert else flip_affine(p=0.5))
if max_warp: res.append(symmetric_warp(magnitude=(-max_warp,max_warp), p=p_affine))
if max_rotate: res.append(rotate(degrees=(-max_rotate,max_rotate), p=p_affine))
if max_zoom>1: res.append(rand_zoom(scale=(1.,max_zoom), p=p_affine))
if max_lighting:
res.append(brightness(change=(0.5*(1-max_lighting), 0.5*(1+max_lighting)), p=p_lighting))
res.append(contrast(scale=(1-max_lighting, 1/(1-max_lighting)), p=p_lighting))
# train , valid
return (res + listify(xtra_tfms), [crop_pad()])
tfms = get_transforms()
_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
tfm_x, tfm_y = test_tfm(img, pnts, tfms[0], size=224)
tfm_x.show(ax=ax, y = tfm_y)