Notebook

In [ ]:

%reload_ext autoreload
%autoreload 2

In [ ]:

from fastai import *
from fastai.vision import *

Faces¶

Setup¶

Download the dataset here from the pytoch tutorial on transforms. Unzip it in the data directory, so that data/faces/ contains the images and the csv file.

In [ ]:

PATH = Path('../../data/faces/')
img_fns = get_image_files(PATH)
len(img_fns)

In [ ]:

poses = pd.read_csv(PATH/'face_landmarks.csv')
poses.head()

In [ ]:

pose_dict = {o[0]:o[1:].astype(np.float32) for o in poses.values}

Reading the coordinates. We will adopt the pytorch convention in grid_sampler where the coordinates are normalized between -1 and 1. (-1,-1) is the top left corner, (1,1) the bottom right. This function scales or unscales the coordinates to that purpose

In [ ]:

#export
def scale_flow(flow, to_unit=True):
    s = tensor([flow.size[0]/2,flow.size[1]/2])[None]
    if to_unit: flow.flow = flow.flow/s-1
    else:       flow.flow = (flow.flow+1)*s
    return flow

In [ ]:

pose_pnts = []
for i, fname in enumerate(img_fns):
    size = open_image(fname).size
    coords = tensor(pose_dict[fname.name]).view(-1,2)
    pose_pnts.append(coords)

In [ ]:

len(pose_pnts)

In [ ]:

pose_pnts[0].shape,pose_pnts[0][0]

Let's have a look at the data.

In [ ]:

def show_pose(img, pnts, ax=None):
    if ax is None: _,ax = plt.subplots()
    img.show(ax=ax, hide_axis=False)
    ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r')

In [ ]:

img = open_image(img_fns[0])
show_pose(img, pose_pnts[0])

ImagePoints¶

So when we change the picture, the points must be changed accordingly. Specifically, transforms that need to be applied to the points are the reciprocal functions that those applied to pixel values. To deal with that:

we use an affine_inv_mult function to do the inverse of the affine transforms
we add an invert bool argument (default False) to coord transforms to get that inverse operation done

Additionaly, we use singledispatch to change the implementation of pixel transforms for coords.

In [ ]:

#export
def _affine_inv_mult(c, m):
    "Applies the inverse affine transform described in m"
    size = c.flow.size()
    h,w = c.size
    m[0,1] *= h/w
    m[1,0] *= w/h
    c.flow = c.flow.view(-1,2)
    a = torch.inverse(m[:2,:2].t())
    c.flow = torch.mm(c.flow - m[:2,2], a).view(size)
    return c

In [ ]:

#export
class ImagePoints1(Image):
    "Support applying transforms to a flow points."
    def __init__(self, flow:FlowField, scale:bool=True, y_first=True):
        "Create from raw tensor image data `px`."
        if scale: flow = scale_flow(flow)
        if y_first: flow.flow = flow.flow.flip(1)
        self._flow = flow
        self._affine_mat = None
        self.flow_func = []
        self.sample_kwargs = {}
        self.transformed = False

    def clone(self):
        "Mimic the behavior of torch.clone for `Image` objects."
        return self.__class__(FlowField(self.size, self.flow.flow.clone()), scale=False, y_first=False)

    @property
    def shape(self)->Tuple[int,int,int]: return (1, *self._flow.size)
    @property
    def size(self)->Tuple[int,int]: return self._flow.size
    @size.setter
    def size(self, sz:int): self._flow.size=sz
    @property
    def device(self)->torch.device: return self._flow.flow.device

    def __repr__(self): return f'{self.__class__.__name__} {tuple(self.size)}'
    
    @property
    def flow(self)->FlowField:
        "Access the flow-field grid after applying queued affine and coord transforms."
        if self._affine_mat is not None:
            self._flow = _affine_inv_mult(self._flow, self._affine_mat)
            self._affine_mat = None
            self.transformed = True
        if len(self.flow_func) != 0:
            for f in self.flow_func[::-1]: self._flow = f(self._flow)
            self.transformed = True
            self.flow_func = []
        return self._flow
    
    @flow.setter
    def flow(self,v:FlowField):  self._flow=v
    
    def coord(self, func:CoordFunc, *args, **kwargs)->'Image':
        "Put `func` with `args` and `kwargs` in `self.flow_func` for later."
        if 'invert' in kwargs: kwargs['invert'] = True
        else: warn(f"{func.__name__} isn't implemented for `ImagePoints`.")
        self.flow_func.append(partial(func, *args, **kwargs))
        return self

    def lighting(self, func:LightingFunc, *args:Any, **kwargs:Any)->'Image': return self

    def pixel(self, func:PixelFunc, *args, **kwargs)->'Image':
        "Equivalent to `self = func_flow(self)`."
        self = func(self, *args, **kwargs)
        self.transformed=True
        return self
    
    def refresh(self):
        return self
    
    def resize(self, size:Union[int,TensorImageSize]):
        "Resize the image to `size`, size can be a single int."
        if isinstance(size, int): size=(1, size, size)
        self._flow.size = size[1:]
        return self
    
    @property
    def data(self)->TensorImage:
        "Return the points associated to this object."
        flow = self.flow #This updates flow before we test if some transforms happened
        if self.transformed:
            if 'remove_out' not in self.sample_kwargs or self.sample_kwargs['remove_out']:
                flow = _remove_points_out(flow)
            self.transformed=False
        return flow.flow.flip(1)
    
    def show(self, ax=None, figsize=(3,3), title:Optional[str]=None, hide_axis:bool=True):
        if ax is None: _,ax = plt.subplots(figsize=figsize)
        pnt = scale_flow(FlowField(self.size, self.data), to_unit=False).flow.flip(1)
        ax.scatter(pnt[:, 0], pnt[:, 1], s=10, marker='.', c='r')
        if hide_axis: ax.axis('off')
        if title: ax.set_title(title)

In [ ]:

#export
def _remove_points_out(flow:FlowField):
    pad_mask = (flow.flow[:,0] >= -1) * (flow.flow[:,0] <= 1) * (flow.flow[:,1] >= -1) * (flow.flow[:,1] <= 1)
    flow.flow = flow.flow[pad_mask]
    return flow

In [ ]:

img = open_image(img_fns[0])
pnts = ImagePoints1(FlowField(img.size, pose_pnts[0].flip(1)))
img.show(y=pnts)

In [ ]:

def test_tfm(x, y, tfms, **kwargs):
    tfm_x = apply_tfms(tfms, x, **kwargs)
    tfm_y = apply_tfms(tfms, y, do_resolve=False, **kwargs)
    return tfm_x, tfm_y

Pixel tranforms aren't supported for points. The only pixel transforms in fastai are flip_lr, dihedral, crop, pad and crop_pad. We can implement the first two at an affine or coord level, the only downside is that it will trigger an unnecessary interpolation for the image (if we didn't need one). The last two are a bit more messy because they change the size of the image...

In [ ]:

#export
@TfmAffine
def flip_affine() -> TfmAffine:
    return [[-1, 0, 0.],
            [0,  1, 0],
            [0,  0, 1.]]

In [ ]:

tfms = [rand_zoom(scale=(1.,1.25)), rotate(degrees=(-30,30)), flip_affine(p=0.5)]
_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
    tfm_x, tfm_y = test_tfm(img, pnts, tfms)
    tfm_x.show(ax=ax, y = tfm_y)

By default, point outs of the image are removed.

In [ ]:

tfms = [rotate(degrees=-30)]

In [ ]:

tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros')

In [ ]:

tfm_x.show(y=tfm_y)

But you can change this with remove_out=False.

In [ ]:

tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', remove_out=False)

In [ ]:

tfm_x.show(y=tfm_y)

In [ ]:

#export
def _pad_coord(x, row_pad:int, col_pad:int, mode='zeros'):
    #TODO: implement other padding modes than zeros?
    h,w = x.size
    pad = torch.Tensor([w/(w + 2*col_pad), h/(h + 2*row_pad)])
    x.flow = FlowField((h+2*row_pad, w+2*col_pad) , x.flow.flow * pad[None])
    #x.flow.flow.mul_(pad[None])
    #x.size =  (h+2*row_pad, w+2*col_pad) 
    return x

In [ ]:

#export
from functools import singledispatch

In [ ]:

#export
_pad_mode_convert = {'reflection':'reflect', 'zeros':'constant', 'border':'replicate'}

@partial(TfmPixel, order=-10)
@singledispatch
def pad(x, padding:int, mode='reflection'):
    "Pad `x` with `padding` pixels. `mode` fills in space ('zeros','reflection','border')."
    mode = _pad_mode_convert[mode]
    return F.pad(x[None], (padding,)*4, mode=mode)[0]

@pad.register(ImagePoints1)
def _(x, padding:int, mode='reflection'):
    return _pad_coord(x, padding, padding, mode)

@TfmPixel
@singledispatch
def crop(x, size, row_pct:uniform=0.5, col_pct:uniform=0.5):
    "Crop `x` to `size` pixels. `row_pct`,`col_pct` select focal point of crop."
    size = listify(size,2)
    rows,cols = size
    row = int((x.size(1)-rows+1) * row_pct)
    col = int((x.size(2)-cols+1) * col_pct)
    return x[:, row:row+rows, col:col+cols].contiguous()

@crop.register(ImagePoints1)
def _(x, size, row_pct=0.5, col_pct=0.5):
    h,w = x.size
    rows,cols = listify(size, 2)
    x.flow.flow.mul_(torch.Tensor([w/cols, h/rows])[None])
    row = int((h-rows+1) * row_pct)
    col = int((w-cols+1) * col_pct)
    x.flow.flow.add_(-1 + torch.Tensor([w/cols-2*col/cols, h/rows-2*row/rows])[None])
    x.size = (rows, cols)
    return x

@TfmCrop
@singledispatch
def crop_pad(x, size, padding_mode='reflection',
             row_pct:uniform = 0.5, col_pct:uniform = 0.5):
    "Crop and pad tfm - `row_pct`,`col_pct` sets focal point."
    padding_mode = _pad_mode_convert[padding_mode]
    size = listify(size,2)
    if x.shape[1:] == size: return x
    rows,cols = size
    if x.size(1)<rows or x.size(2)<cols:
        row_pad = max((rows-x.size(1)+1)//2, 0)
        col_pad = max((cols-x.size(2)+1)//2, 0)
        x = F.pad(x[None], (col_pad,col_pad,row_pad,row_pad), mode=padding_mode)[0]
    row = int((x.size(1)-rows+1)*row_pct)
    col = int((x.size(2)-cols+1)*col_pct)

    x = x[:, row:row+rows, col:col+cols]
    return x.contiguous() # without this, get NaN later - don't know why

@crop_pad.register(ImagePoints1)
def _(x, size, padding_mode='reflection', row_pct = 0.5, col_pct = 0.5):
    size = listify(size,2)
    rows,cols = size
    if x.size[0]<rows or x.size[1]<cols:
        row_pad = max((rows-x.size[0]+1)//2, 0)
        col_pad = max((cols-x.size[1]+1)//2, 0)
        x = _pad_coord(x, row_pad, col_pad)
    return crop(x,(rows,cols), row_pct, col_pct)

import fastai
fastai.vision.transform.pad = pad
fastai.vision.transform.crop = crop
fastai.vision.transform.crop_pad = crop_pad

In [ ]:

#tfms = [pad(padding=100)]
tfms = [crop_pad(size=300, row_pct=(0.,1.), col_pct=(0.,1.))]

In [ ]:

tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', size=300)

In [ ]:

tfm_x.show(y=tfm_y)

In [ ]:

#export
@TfmAffine
def dihedral_affine(k:partial(uniform_int,0,8)):
    "Randomly flip `x` image based on `k`."
    x = -1 if k&1 else 1
    y = -1 if k&2 else 1
    if k&4: return [[0, x, 0.],
                    [y, 0, 0],
                    [0, 0, 1.]]
    return [[x, 0, 0.],
            [0, y, 0],
            [0, 0, 1.]]

In [ ]:

_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
    tfms = [dihedral_affine(p=1)]
    tfms[0].resolved = {'k':i}
    tfm_x = apply_tfms(tfms, img, do_resolve=False)
    tfm_y = apply_tfms(tfms, pnts, do_resolve=False)
    tfm_x.show(ax=ax, y = tfm_y)

Coord transforms (perspective warping)¶

Change in the current implementation to support points data augmentation.

In [ ]:

#export
def _find_coeffs(orig_pts:Points, targ_pts:Points)->Tensor:
    "Find 8 coeff mentioned [here](https://web.archive.org/web/20150222120106/xenia.media.mit.edu/~cwren/interpolator/)."
    matrix = []
    #The equations we'll need to solve.
    for p1, p2 in zip(targ_pts, orig_pts):
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])

    A = FloatTensor(matrix)
    B = FloatTensor(orig_pts).view(8)
    #The 8 scalars we seek are solution of AX = B
    return torch.gesv(B,A)[0][:,0]

def _apply_perspective(coords:FlowField, coeffs:Points)->FlowField:
    "Transform `coords` with `coeffs`."
    size = coords.flow.size()
    #compress all the dims expect the last one ang adds ones, coords become N * 3
    coords.flow = coords.flow.view(-1,2)
    #Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
    coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
    coords.flow = torch.addmm(coeffs[:,2], coords.flow, coeffs[:,:2].t())
    coords.flow.mul_(1/coords.flow[:,2].unsqueeze(1))
    coords.flow = coords.flow[:,:2].view(size)
    return coords

_orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]

def _perspective_warp(c:FlowField, targ_pts:Points, invert=False):
    "Apply warp to `targ_pts` from `_orig_pts` to `c` `FlowField`."
    if invert: return _apply_perspective(c, _find_coeffs(targ_pts, _orig_pts))
    return _apply_perspective(c, _find_coeffs(_orig_pts, targ_pts))

@TfmCoord
def perspective_warp(c, magnitude:partial(uniform,size=8)=0, invert=False):
    "Apply warp of `magnitude` to `c`."
    magnitude = magnitude.view(4,2)
    targ_pts = [[x+m for x,m in zip(xs, ms)] for xs, ms in zip(_orig_pts, magnitude)]
    return _perspective_warp(c, targ_pts, invert)

@TfmCoord
def symmetric_warp(c, magnitude:partial(uniform,size=4)=0, invert=False):
    "Apply symmetric warp of `magnitude` to `c`."
    m = listify(magnitude, 4)
    targ_pts = [[-1-m[3],-1-m[1]], [-1-m[2],1+m[1]], [1+m[3],-1-m[0]], [1+m[2],1+m[0]]]
    return _perspective_warp(c, targ_pts, invert)

@TfmCoord
def tilt(c, direction:uniform_int, magnitude:uniform=0, invert=False):
    "Tilt `c` field with random `direction` and `magnitude`."
    orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    if direction == 0:   targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]
    elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]]
    coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
    return _apply_perspective(c, coeffs)

@TfmCoord
def skew(c, direction:uniform_int, magnitude:uniform=0, invert=False):
    "Skew `c` field with random `direction` and `magnitude`."
    orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    if direction == 0:   targ_pts = [[-1-magnitude,-1], [-1,1], [1,-1], [1,1]]
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1], [1,-1], [1,1]]
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1,1]]
    elif direction == 3: targ_pts = [[-1,-1], [-1,1+magnitude], [1,-1], [1,1]]
    elif direction == 4: targ_pts = [[-1,-1], [-1,1], [1+magnitude,-1], [1,1]]
    elif direction == 5: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1]]
    elif direction == 6: targ_pts = [[-1,-1], [-1,1], [1,-1], [1+magnitude,1]]
    elif direction == 7: targ_pts = [[-1,-1], [-1,1], [1,-1], [1,1+magnitude]]
    coeffs = _find_coeffs(targ_pts, _orig_pts) if invert else _find_coeffs(_orig_pts, targ_pts)
    return _apply_perspective(c, coeffs)

In [ ]:

tfms = [tilt(direction=(0,3), magnitude=0.4)]

In [ ]:

tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros', size=300)

In [ ]:

tfm_x.show(y=tfm_y)

In [ ]:

tfms = [symmetric_warp(magnitude=(-0.4,0.4))]
tfm_x,tfm_y = test_tfm(img, pnts, tfms, padding_mode='zeros')
tfm_x.show(y=tfm_y)

All at once¶

In [ ]:

def get_transforms(do_flip:bool=True, flip_vert:bool=False, max_rotate:float=10., max_zoom:float=1.1,
                   max_lighting:float=0.2, max_warp:float=0.2, p_affine:float=0.75,
                   p_lighting:float=0.75, xtra_tfms:float=None)->Collection[Transform]:
    "Utility func to easily create a list of flip, rotate, `zoom`, warp, lighting transforms."
    res = [rand_crop()]
    if do_flip:    res.append(dihedral() if flip_vert else flip_affine(p=0.5))
    if max_warp:   res.append(symmetric_warp(magnitude=(-max_warp,max_warp), p=p_affine))
    if max_rotate: res.append(rotate(degrees=(-max_rotate,max_rotate), p=p_affine))
    if max_zoom>1: res.append(rand_zoom(scale=(1.,max_zoom), p=p_affine))
    if max_lighting:
        res.append(brightness(change=(0.5*(1-max_lighting), 0.5*(1+max_lighting)), p=p_lighting))
        res.append(contrast(scale=(1-max_lighting, 1/(1-max_lighting)), p=p_lighting))
    #       train                   , valid
    return (res + listify(xtra_tfms), [crop_pad()])

In [ ]:

tfms = get_transforms()

In [ ]:

_, axs = plt.subplots(2, 4, figsize=(10,5))
for i, ax in enumerate(axs.flatten()):
    tfm_x, tfm_y = test_tfm(img, pnts, tfms[0], size=224)
    tfm_x.show(ax=ax, y = tfm_y)

In [ ]: