#!/usr/bin/env python # coding: utf-8 # In[ ]: #hide from utils import * # # Other computer vision problems # ## Multi-label classification # ### The data # In[ ]: from fastai2.vision.all import * path = untar_data(URLs.PASCAL_2007) # In[ ]: df = pd.read_csv(path/'train.csv') df.head() # ### Sidebar: Pandas and DataFrames # In[ ]: df.iloc[:,0] df.iloc[0,:] # Trailing ‘:’s are always optional (in numpy, PyTorch, pandas, etc), # so this is equivalent: df.iloc[0] # In[ ]: df['fname'] # ### End sidebar # ### Constructing a data block # In[ ]: dblock = DataBlock() # In[ ]: dsets = dblock.datasets(df) # In[ ]: dsets.train[0] # In[ ]: dblock = DataBlock(get_x = lambda r: r['fname'], get_y = lambda r: r['labels']) dsets = dblock.datasets(df) dsets.train[0] # In[ ]: def get_x(r): return r['fname'] def get_y(r): return r['labels'] dblock = DataBlock(get_x = get_x, get_y = get_y) dsets = dblock.datasets(df) dsets.train[0] # In[ ]: #hide Path.BASE_PATH = path # In[ ]: def get_x(r): return path/'train'/r['fname'] def get_y(r): return r['labels'].split(' ') dblock = DataBlock(get_x = get_x, get_y = get_y) dsets = dblock.datasets(df) dsets.train[0] # In[ ]: dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), get_x = get_x, get_y = get_y) dsets = dblock.datasets(df) dsets.train[0] # In[ ]: idxs = torch.where(dsets.train[0][1]==1.)[0] dsets.train.vocab[idxs] # In[ ]: def splitter(df): train = df.index[~df['is_valid']].tolist() valid = df.index[df['is_valid']].tolist() return train,valid dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), splitter=splitter, get_x=get_x, get_y=get_y) dsets = dblock.datasets(df) dsets.train[0] # In[ ]: dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), splitter=splitter, get_x=get_x, get_y=get_y, item_tfms = RandomResizedCrop(128, min_scale=0.35)) dls = dblock.dataloaders(df) # In[ ]: dls.show_batch(rows=1, cols=3) # ### Binary cross entropy # In[ ]: learn = cnn_learner(dls, resnet18) # In[ ]: x,y = dls.train.one_batch() activs = learn.model(x) activs.shape # In[ ]: activs[0] # In[ ]: def binary_cross_entropy(inputs, targets): inputs = inputs.sigmoid() return torch.where(targets==1, 1-inputs, inputs).log().mean() # In[ ]: loss_func = nn.BCEWithLogitsLoss() loss = loss_func(activs, y) loss # In[ ]: def say_hello(name, say_what="Hello"): return f"{say_what} {name}." say_hello('Jeremy'),say_hello('Jeremy', 'Ahoy!') # In[ ]: f = partial(say_hello, say_what="Bonjour") f("Jeremy"),f("Sylvain") # In[ ]: learn = cnn_learner(dls, resnet50, metrics=partial(accuracy_multi, thresh=0.2)) learn.fine_tune(3, base_lr=3e-3, freeze_epochs=4) # In[ ]: learn.metrics = partial(accuracy_multi, thresh=0.1) learn.validate() # In[ ]: learn.metrics = partial(accuracy_multi, thresh=0.99) learn.validate() # In[ ]: preds,targs = learn.get_preds() # In[ ]: accuracy_multi(preds, targs, thresh=0.9, sigmoid=False) # In[ ]: xs = torch.linspace(0.05,0.95,29) accs = [accuracy_multi(preds, targs, thresh=i, sigmoid=False) for i in xs] plt.plot(xs,accs); # ## Regression # ### Assemble the data # In[ ]: path = untar_data(URLs.BIWI_HEAD_POSE) # In[ ]: #hide Path.BASE_PATH = path # In[ ]: path.ls() # In[ ]: (path/'01').ls() # In[ ]: img_files = get_image_files(path) def img2pose(x): return Path(f'{str(x)[:-7]}pose.txt') img2pose(img_files[0]) # In[ ]: im = PILImage.create(img_files[0]) im.shape # In[ ]: im.to_thumb(160) # In[ ]: cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6) def get_ctr(f): ctr = np.genfromtxt(img2pose(f), skip_header=3) c1 = ctr[0] * cal[0][0]/ctr[2] + cal[0][2] c2 = ctr[1] * cal[1][1]/ctr[2] + cal[1][2] return tensor([c1,c2]) # In[ ]: get_ctr(img_files[0]) # In[ ]: biwi = DataBlock(blocks=(ImageBlock, PointBlock), get_items=get_image_files, get_y=get_ctr, splitter=FuncSplitter(lambda o: o.parent.name=='13'), batch_tfms=[*aug_transforms(size=(240,320)), Normalize.from_stats(*imagenet_stats)]) # In[ ]: dls = biwi.dataloaders(path) dls.show_batch(max_n=9, figsize=(8,6)) # In[ ]: xb,yb = dls.one_batch() xb.shape,yb.shape # In[ ]: yb[0] # ### Training a model # In[ ]: learn = cnn_learner(dls, resnet18, y_range=(-1,1)) # In[ ]: def sigmoid_range(x, lo, hi): return torch.sigmoid(x) * (hi-lo) + lo # In[ ]: plot_function(partial(sigmoid_range,lo=-1,hi=1), min=-4, max=4) # In[ ]: dls.loss_func # In[ ]: learn.lr_find() # In[ ]: lr = 2e-2 learn.fit_one_cycle(5, lr) # In[ ]: math.sqrt(0.0001) # In[ ]: learn.show_results(ds_idx=1, max_n=3, figsize=(6,8)) # ## Conclusion # ## Questionnaire # ### Further research # In[ ]: