In [ ]:

#hide
from utils import *

Other computer vision problems¶

Multi-label classification¶

The data¶

In [ ]:

from fastai2.vision.all import *
path = untar_data(URLs.PASCAL_2007)

In [ ]:

df = pd.read_csv(path/'train.csv')
df.head()

Out[ ]:

	fname	labels	is_valid
0	000005.jpg	chair	True
1	000007.jpg	car	True
2	000009.jpg	horse person	True
3	000012.jpg	car	False
4	000016.jpg	bicycle	True

In [ ]:

df.iloc[:,0]
df.iloc[0,:]
# Trailing ‘:’s are always optional (in numpy, PyTorch, pandas, etc),
#   so this is equivalent:
df.iloc[0]

Out[ ]:

fname       000005.jpg
labels           chair
is_valid          True
Name: 0, dtype: object

In [ ]:

df['fname']

Out[ ]:

0       000005.jpg
1       000007.jpg
2       000009.jpg
3       000012.jpg
4       000016.jpg
           ...    
5006    009954.jpg
5007    009955.jpg
5008    009958.jpg
5009    009959.jpg
5010    009961.jpg
Name: fname, Length: 5011, dtype: object

Constructing a data block¶

In [ ]:

dblock = DataBlock()

In [ ]:

dsets = dblock.datasets(df)

In [ ]:

dsets.train[0]

Out[ ]:

(fname       008663.jpg
 labels      car person
 is_valid         False
 Name: 4346, dtype: object, fname       008663.jpg
 labels      car person
 is_valid         False
 Name: 4346, dtype: object)

In [ ]:

dblock = DataBlock(get_x = lambda r: r['fname'], get_y = lambda r: r['labels'])
dsets = dblock.datasets(df)
dsets.train[0]

Out[ ]:

('005620.jpg', 'aeroplane')

In [ ]:

def get_x(r): return r['fname']
def get_y(r): return r['labels']
dblock = DataBlock(get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]

Out[ ]:

('002549.jpg', 'tvmonitor')

In [ ]:

#hide
Path.BASE_PATH = path

In [ ]:

def get_x(r): return path/'train'/r['fname']
def get_y(r): return r['labels'].split(' ')
dblock = DataBlock(get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]

Out[ ]:

(Path('train/002844.jpg'), ['train'])

In [ ]:

dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                   get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]

Out[ ]:

(PILImage mode=RGB size=500x375,
 TensorMultiCategory([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]))

In [ ]:

idxs = torch.where(dsets.train[0][1]==1.)[0]
dsets.train.vocab[idxs]

Out[ ]:

(#1) ['dog']

In [ ]:

def splitter(df):
    train = df.index[~df['is_valid']].tolist()
    valid = df.index[df['is_valid']].tolist()
    return train,valid

dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                   splitter=splitter,
                   get_x=get_x, 
                   get_y=get_y)

dsets = dblock.datasets(df)
dsets.train[0]

Out[ ]:

(PILImage mode=RGB size=500x333,
 TensorMultiCategory([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))

In [ ]:

dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
                   splitter=splitter,
                   get_x=get_x, 
                   get_y=get_y,
                   item_tfms = RandomResizedCrop(128, min_scale=0.35))
dls = dblock.dataloaders(df)

In [ ]:

dls.show_batch(rows=1, cols=3)

Binary cross entropy¶

In [ ]:

learn = cnn_learner(dls, resnet18)

In [ ]:

x,y = dls.train.one_batch()
activs = learn.model(x)
activs.shape

Out[ ]:

torch.Size([64, 20])

In [ ]:

activs[0]

Out[ ]:

tensor([-1.0028,  0.3400, -0.5906,  0.7806,  3.1160, -0.1994,  1.3180,  1.6361, -1.7553,  0.2217,  2.8052,  1.3229,  0.9369, -1.4760, -0.3204, -2.3116, -3.8615, -1.5931,  0.0745, -3.6006],
       device='cuda:5', grad_fn=<SelectBackward>)

In [ ]:

def binary_cross_entropy(inputs, targets):
    inputs = inputs.sigmoid()
    return torch.where(targets==1, 1-inputs, inputs).log().mean()

In [ ]:

loss_func = nn.BCEWithLogitsLoss()
loss = loss_func(activs, y)
loss

Out[ ]:

tensor(1.0082, device='cuda:5', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

In [ ]:

def say_hello(name, say_what="Hello"): return f"{say_what} {name}."
say_hello('Jeremy'),say_hello('Jeremy', 'Ahoy!')

Out[ ]:

('Hello Jeremy.', 'Ahoy! Jeremy.')

In [ ]:

f = partial(say_hello, say_what="Bonjour")
f("Jeremy"),f("Sylvain")

Out[ ]:

('Bonjour Jeremy.', 'Bonjour Sylvain.')

In [ ]:

learn = cnn_learner(dls, resnet50, metrics=partial(accuracy_multi, thresh=0.2))
learn.fine_tune(3, base_lr=3e-3, freeze_epochs=4)

epoch	train_loss	valid_loss	accuracy_multi	time
0	0.903610	0.659728	0.263068	00:07
1	0.724266	0.346332	0.525458	00:07
2	0.415597	0.125662	0.937590	00:07
3	0.254987	0.116880	0.945418	00:07

epoch	train_loss	valid_loss	accuracy_multi	time
0	0.123872	0.132634	0.940179	00:08
1	0.112387	0.113758	0.949343	00:08
2	0.092151	0.104368	0.951195	00:08

In [ ]:

learn.metrics = partial(accuracy_multi, thresh=0.1)
learn.validate()

Out[ ]:

(#2) [0.10436797887086868,0.93057781457901]

In [ ]:

learn.metrics = partial(accuracy_multi, thresh=0.99)
learn.validate()

Out[ ]:

(#2) [0.10436797887086868,0.9416930675506592]

In [ ]:

preds,targs = learn.get_preds()

In [ ]:

accuracy_multi(preds, targs, thresh=0.9, sigmoid=False)

Out[ ]:

TensorMultiCategory(0.9554)

In [ ]:

xs = torch.linspace(0.05,0.95,29)
accs = [accuracy_multi(preds, targs, thresh=i, sigmoid=False) for i in xs]
plt.plot(xs,accs);

Regression¶

Assemble the data¶

In [ ]:

path = untar_data(URLs.BIWI_HEAD_POSE)

In [ ]:

#hide
Path.BASE_PATH = path

In [ ]:

path.ls()

Out[ ]:

(#50) [Path('13.obj'),Path('07.obj'),Path('06.obj'),Path('13'),Path('10'),Path('02'),Path('11'),Path('01'),Path('20.obj'),Path('17')...]

In [ ]:

(path/'01').ls()

Out[ ]:

(#1000) [Path('01/frame_00281_pose.txt'),Path('01/frame_00078_pose.txt'),Path('01/frame_00349_rgb.jpg'),Path('01/frame_00304_pose.txt'),Path('01/frame_00207_pose.txt'),Path('01/frame_00116_rgb.jpg'),Path('01/frame_00084_rgb.jpg'),Path('01/frame_00070_rgb.jpg'),Path('01/frame_00125_pose.txt'),Path('01/frame_00324_rgb.jpg')...]

In [ ]:

img_files = get_image_files(path)
def img2pose(x): return Path(f'{str(x)[:-7]}pose.txt')
img2pose(img_files[0])

Out[ ]:

Path('13/frame_00349_pose.txt')

In [ ]:

im = PILImage.create(img_files[0])
im.shape

Out[ ]:

(480, 640)

In [ ]:

im.to_thumb(160)

Out[ ]:

In [ ]:

cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6)
def get_ctr(f):
    ctr = np.genfromtxt(img2pose(f), skip_header=3)
    c1 = ctr[0] * cal[0][0]/ctr[2] + cal[0][2]
    c2 = ctr[1] * cal[1][1]/ctr[2] + cal[1][2]
    return tensor([c1,c2])

In [ ]:

get_ctr(img_files[0])

Out[ ]:

tensor([384.6370, 259.4787])

In [ ]:

biwi = DataBlock(blocks=(ImageBlock, PointBlock),
                 get_items=get_image_files,
                 get_y=get_ctr,
                 splitter=FuncSplitter(lambda o: o.parent.name=='13'),
                 batch_tfms=[*aug_transforms(size=(240,320)), Normalize.from_stats(*imagenet_stats)])

In [ ]:

dls = biwi.dataloaders(path)
dls.show_batch(max_n=9, figsize=(8,6))

In [ ]:

xb,yb = dls.one_batch()
xb.shape,yb.shape

Out[ ]:

(torch.Size([64, 3, 240, 320]), torch.Size([64, 1, 2]))

In [ ]:

yb[0]

Out[ ]:

tensor([[0.0111, 0.1810]], device='cuda:5')

Training a model¶

In [ ]:

learn = cnn_learner(dls, resnet18, y_range=(-1,1))

In [ ]:

def sigmoid_range(x, lo, hi): return torch.sigmoid(x) * (hi-lo) + lo

In [ ]:

plot_function(partial(sigmoid_range,lo=-1,hi=1), min=-4, max=4)

In [ ]:

dls.loss_func

Out[ ]:

FlattenedLoss of MSELoss()

In [ ]:

learn.lr_find()

In [ ]:

lr = 2e-2
learn.fit_one_cycle(5, lr)

epoch	train_loss	valid_loss	time
0	0.045840	0.012957	00:36
1	0.006369	0.001853	00:36
2	0.003000	0.000496	00:37
3	0.001963	0.000360	00:37
4	0.001584	0.000116	00:36

In [ ]:

math.sqrt(0.0001)

Out[ ]:

0.01

In [ ]:

learn.show_results(ds_idx=1, max_n=3, figsize=(6,8))

Conclusion¶

Questionnaire¶

Further research¶

In [ ]:

Other computer vision problems¶

Multi-label classification¶

The data¶

Sidebar: Pandas and DataFrames¶

End sidebar¶

Constructing a data block¶

Binary cross entropy¶

Regression¶

Assemble the data¶

Training a model¶

Conclusion¶

Questionnaire¶

Further research¶