#hide
from utils import *
from fastai2.vision.all import *
path = untar_data(URLs.PASCAL_2007)
df = pd.read_csv(path/'train.csv')
df.head()
| fname | labels | is_valid | |
|---|---|---|---|
| 0 | 000005.jpg | chair | True |
| 1 | 000007.jpg | car | True |
| 2 | 000009.jpg | horse person | True |
| 3 | 000012.jpg | car | False |
| 4 | 000016.jpg | bicycle | True |
df.iloc[:,0]
df.iloc[0,:]
# Trailing ‘:’s are always optional (in numpy, PyTorch, pandas, etc),
# so this is equivalent:
df.iloc[0]
fname 000005.jpg labels chair is_valid True Name: 0, dtype: object
df['fname']
0 000005.jpg
1 000007.jpg
2 000009.jpg
3 000012.jpg
4 000016.jpg
...
5006 009954.jpg
5007 009955.jpg
5008 009958.jpg
5009 009959.jpg
5010 009961.jpg
Name: fname, Length: 5011, dtype: object
dblock = DataBlock()
dsets = dblock.datasets(df)
dsets.train[0]
(fname 008663.jpg labels car person is_valid False Name: 4346, dtype: object, fname 008663.jpg labels car person is_valid False Name: 4346, dtype: object)
dblock = DataBlock(get_x = lambda r: r['fname'], get_y = lambda r: r['labels'])
dsets = dblock.datasets(df)
dsets.train[0]
('005620.jpg', 'aeroplane')
def get_x(r): return r['fname']
def get_y(r): return r['labels']
dblock = DataBlock(get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]
('002549.jpg', 'tvmonitor')
#hide
Path.BASE_PATH = path
def get_x(r): return path/'train'/r['fname']
def get_y(r): return r['labels'].split(' ')
dblock = DataBlock(get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]
(Path('train/002844.jpg'), ['train'])
dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
get_x = get_x, get_y = get_y)
dsets = dblock.datasets(df)
dsets.train[0]
(PILImage mode=RGB size=500x375, TensorMultiCategory([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]))
idxs = torch.where(dsets.train[0][1]==1.)[0]
dsets.train.vocab[idxs]
(#1) ['dog']
def splitter(df):
train = df.index[~df['is_valid']].tolist()
valid = df.index[df['is_valid']].tolist()
return train,valid
dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
splitter=splitter,
get_x=get_x,
get_y=get_y)
dsets = dblock.datasets(df)
dsets.train[0]
(PILImage mode=RGB size=500x333, TensorMultiCategory([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))
dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
splitter=splitter,
get_x=get_x,
get_y=get_y,
item_tfms = RandomResizedCrop(128, min_scale=0.35))
dls = dblock.dataloaders(df)
dls.show_batch(rows=1, cols=3)
learn = cnn_learner(dls, resnet18)
x,y = dls.train.one_batch()
activs = learn.model(x)
activs.shape
torch.Size([64, 20])
activs[0]
tensor([-1.0028, 0.3400, -0.5906, 0.7806, 3.1160, -0.1994, 1.3180, 1.6361, -1.7553, 0.2217, 2.8052, 1.3229, 0.9369, -1.4760, -0.3204, -2.3116, -3.8615, -1.5931, 0.0745, -3.6006],
device='cuda:5', grad_fn=<SelectBackward>)
def binary_cross_entropy(inputs, targets):
inputs = inputs.sigmoid()
return torch.where(targets==1, 1-inputs, inputs).log().mean()
loss_func = nn.BCEWithLogitsLoss()
loss = loss_func(activs, y)
loss
tensor(1.0082, device='cuda:5', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
def say_hello(name, say_what="Hello"): return f"{say_what} {name}."
say_hello('Jeremy'),say_hello('Jeremy', 'Ahoy!')
('Hello Jeremy.', 'Ahoy! Jeremy.')
f = partial(say_hello, say_what="Bonjour")
f("Jeremy"),f("Sylvain")
('Bonjour Jeremy.', 'Bonjour Sylvain.')
learn = cnn_learner(dls, resnet50, metrics=partial(accuracy_multi, thresh=0.2))
learn.fine_tune(3, base_lr=3e-3, freeze_epochs=4)
| epoch | train_loss | valid_loss | accuracy_multi | time |
|---|---|---|---|---|
| 0 | 0.903610 | 0.659728 | 0.263068 | 00:07 |
| 1 | 0.724266 | 0.346332 | 0.525458 | 00:07 |
| 2 | 0.415597 | 0.125662 | 0.937590 | 00:07 |
| 3 | 0.254987 | 0.116880 | 0.945418 | 00:07 |
| epoch | train_loss | valid_loss | accuracy_multi | time |
|---|---|---|---|---|
| 0 | 0.123872 | 0.132634 | 0.940179 | 00:08 |
| 1 | 0.112387 | 0.113758 | 0.949343 | 00:08 |
| 2 | 0.092151 | 0.104368 | 0.951195 | 00:08 |
learn.metrics = partial(accuracy_multi, thresh=0.1)
learn.validate()
(#2) [0.10436797887086868,0.93057781457901]
learn.metrics = partial(accuracy_multi, thresh=0.99)
learn.validate()
(#2) [0.10436797887086868,0.9416930675506592]
preds,targs = learn.get_preds()
accuracy_multi(preds, targs, thresh=0.9, sigmoid=False)
TensorMultiCategory(0.9554)
xs = torch.linspace(0.05,0.95,29)
accs = [accuracy_multi(preds, targs, thresh=i, sigmoid=False) for i in xs]
plt.plot(xs,accs);
path = untar_data(URLs.BIWI_HEAD_POSE)
#hide
Path.BASE_PATH = path
path.ls()
(#50) [Path('13.obj'),Path('07.obj'),Path('06.obj'),Path('13'),Path('10'),Path('02'),Path('11'),Path('01'),Path('20.obj'),Path('17')...]
(path/'01').ls()
(#1000) [Path('01/frame_00281_pose.txt'),Path('01/frame_00078_pose.txt'),Path('01/frame_00349_rgb.jpg'),Path('01/frame_00304_pose.txt'),Path('01/frame_00207_pose.txt'),Path('01/frame_00116_rgb.jpg'),Path('01/frame_00084_rgb.jpg'),Path('01/frame_00070_rgb.jpg'),Path('01/frame_00125_pose.txt'),Path('01/frame_00324_rgb.jpg')...]
img_files = get_image_files(path)
def img2pose(x): return Path(f'{str(x)[:-7]}pose.txt')
img2pose(img_files[0])
Path('13/frame_00349_pose.txt')
im = PILImage.create(img_files[0])
im.shape
(480, 640)
im.to_thumb(160)
cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6)
def get_ctr(f):
ctr = np.genfromtxt(img2pose(f), skip_header=3)
c1 = ctr[0] * cal[0][0]/ctr[2] + cal[0][2]
c2 = ctr[1] * cal[1][1]/ctr[2] + cal[1][2]
return tensor([c1,c2])
get_ctr(img_files[0])
tensor([384.6370, 259.4787])
biwi = DataBlock(blocks=(ImageBlock, PointBlock),
get_items=get_image_files,
get_y=get_ctr,
splitter=FuncSplitter(lambda o: o.parent.name=='13'),
batch_tfms=[*aug_transforms(size=(240,320)), Normalize.from_stats(*imagenet_stats)])
dls = biwi.dataloaders(path)
dls.show_batch(max_n=9, figsize=(8,6))
xb,yb = dls.one_batch()
xb.shape,yb.shape
(torch.Size([64, 3, 240, 320]), torch.Size([64, 1, 2]))
yb[0]
tensor([[0.0111, 0.1810]], device='cuda:5')
learn = cnn_learner(dls, resnet18, y_range=(-1,1))
def sigmoid_range(x, lo, hi): return torch.sigmoid(x) * (hi-lo) + lo
plot_function(partial(sigmoid_range,lo=-1,hi=1), min=-4, max=4)
dls.loss_func
FlattenedLoss of MSELoss()
learn.lr_find()
lr = 2e-2
learn.fit_one_cycle(5, lr)
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 0.045840 | 0.012957 | 00:36 |
| 1 | 0.006369 | 0.001853 | 00:36 |
| 2 | 0.003000 | 0.000496 | 00:37 |
| 3 | 0.001963 | 0.000360 | 00:37 |
| 4 | 0.001584 | 0.000116 | 00:36 |
math.sqrt(0.0001)
0.01
learn.show_results(ds_idx=1, max_n=3, figsize=(6,8))