#!/usr/bin/env python # coding: utf-8 # ## Multi-label classification # In[1]: get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: from fastai.conv_learner import * # In[3]: PATH = 'data/planet/' # In[4]: # Data preparation steps if you are using Crestle: os.makedirs('data/planet/models', exist_ok=True) os.makedirs('/cache/planet/tmp', exist_ok=True) get_ipython().system('ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/train-jpg {PATH}') get_ipython().system('ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/test-jpg {PATH}') get_ipython().system('ln -s /datasets/kaggle/planet-understanding-the-amazon-from-space/train_v2.csv {PATH}') get_ipython().system('ln -s /cache/planet/tmp {PATH}') # In[4]: ls {PATH} # ## Multi-label versus single-label classification # In[5]: from fastai.plots import * # In[6]: def get_1st(path): return glob(f'{path}/*.*')[0] # In[6]: dc_path = "data/dogscats/valid/" list_paths = [get_1st(f"{dc_path}cats"), get_1st(f"{dc_path}dogs")] plots_from_files(list_paths, titles=["cat", "dog"], maintitle="Single-label classification") # In single-label classification each sample belongs to one class. In the previous example, each image is either a *dog* or a *cat*. # In[7]: list_paths = [f"{PATH}train-jpg/train_0.jpg", f"{PATH}train-jpg/train_1.jpg"] titles=["haze primary", "agriculture clear primary water"] plots_from_files(list_paths, titles=titles, maintitle="Multi-label classification") # In multi-label classification each sample can belong to one or more clases. In the previous example, the first images belongs to two clases: *haze* and *primary*. The second image belongs to four clases: *agriculture*, *clear*, *primary* and *water*. # ## Multi-label models for Planet dataset # In[5]: from planet import f2 metrics=[f2] f_model = resnet34 # In[6]: label_csv = f'{PATH}train_v2.csv' n = len(list(open(label_csv)))-1 val_idxs = get_cv_idxs(n) # We use a different set of data augmentations for this dataset - we also allow vertical flips, since we don't expect vertical orientation of satellite images to change our classifications. # In[7]: def get_data(sz): tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.05) return ImageClassifierData.from_csv(PATH, 'train-jpg', label_csv, tfms=tfms, suffix='.jpg', val_idxs=val_idxs, test_name='test-jpg') # In[9]: data = get_data(256) # In[35]: x,y = next(iter(data.val_dl)) # In[36]: y # In[37]: list(zip(data.classes, y[0])) # In[43]: plt.imshow(data.val_ds.denorm(to_np(x))[0]*1.4); # In[8]: sz=64 # In[9]: data = get_data(sz) # In[10]: data = data.resize(int(sz*1.3), 'tmp') # In[11]: learn = ConvLearner.pretrained(f_model, data, metrics=metrics) # In[15]: lrf=learn.lr_find() learn.sched.plot() # In[12]: lr = 0.2 # In[13]: learn.fit(lr, 3, cycle_len=1, cycle_mult=2) # In[14]: lrs = np.array([lr/9,lr/3,lr]) # In[15]: learn.unfreeze() learn.fit(lrs, 3, cycle_len=1, cycle_mult=2) # In[19]: learn.save(f'{sz}') # In[22]: learn.sched.plot_loss() # In[20]: sz=128 # In[21]: learn.set_data(get_data(sz)) learn.freeze() learn.fit(lr, 3, cycle_len=1, cycle_mult=2) # In[22]: learn.unfreeze() learn.fit(lrs, 3, cycle_len=1, cycle_mult=2) learn.save(f'{sz}') # In[23]: sz=256 # In[24]: learn.set_data(get_data(sz)) learn.freeze() learn.fit(lr, 3, cycle_len=1, cycle_mult=2) # In[25]: learn.unfreeze() learn.fit(lrs, 3, cycle_len=1, cycle_mult=2) learn.save(f'{sz}') # In[26]: multi_preds, y = learn.TTA() preds = np.mean(multi_preds, 0) # In[27]: f2(preds,y) # ### End # In[ ]: