#!/usr/bin/env python
# coding: utf-8

# In[1]:


from theano.sandbox import cuda
cuda.use('gpu0')


# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')
import utils; reload(utils)
from utils import *
from __future__ import division, print_function


# ## Setup

# In[3]:


path = "data/dogscats/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

batch_size=64


# In[4]:


batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)


# In[6]:


(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames, test_filenames) = get_classes(path)


# In this notebook we're going to create an ensemble of models and use their average as our predictions. For each ensemble, we're going to follow our usual fine-tuning steps:
# 
# 1) Create a model that retrains just the last layer
# 2) Add this to a model containing all VGG layers except the last layer
# 3) Fine-tune just the dense layers of this model (pre-computing the convolutional layers)
# 4) Add data augmentation, fine-tuning the dense layers without pre-computation.
# 
# So first, we need to create our VGG model and pre-compute the output of the conv layers:

# In[15]:


model = Vgg16().model
conv_layers,fc_layers = split_at(model, Convolution2D)


# In[16]:


conv_model = Sequential(conv_layers)


# In[17]:


val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)
trn_features = conv_model.predict_generator(batches, batches.nb_sample)


# In[33]:


save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)


# In the future we can just load these precomputed features:

# In[6]:


trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')


# We can also save some time by pre-computing the training and validation arrays with the image decoding and resizing already done:

# In[7]:


trn = get_data(path+'train')
val = get_data(path+'valid')


# In[8]:


save_array(model_path+'train_data.bc', trn)
save_array(model_path+'valid_data.bc', val)


# In the future we can just load these resized images:

# In[7]:


trn = load_array(model_path+'train_data.bc')
val = load_array(model_path+'valid_data.bc')


# Finally, we can precompute the output of all but the last dropout and dense layers, for creating the first stage of the model:

# In[19]:


model.pop()
model.pop()


# In[20]:


ll_val_feat = model.predict_generator(val_batches, val_batches.nb_sample)
ll_feat = model.predict_generator(batches, batches.nb_sample)


# In[21]:


save_array(model_path + 'train_ll_feat.bc', ll_feat)
save_array(model_path + 'valid_ll_feat.bc', ll_val_feat)


# In[8]:


ll_feat = load_array(model_path+ 'train_ll_feat.bc')
ll_val_feat = load_array(model_path + 'valid_ll_feat.bc')


# ...and let's also grab the test data, for when we need to submit:

# In[16]:


test = get_data(path+'test')
save_array(model_path+'test_data.bc', test)


# In[22]:


test = load_array(model_path+'test_data.bc')


# ## Last layer

# The functions automate creating a model that trains the last layer from scratch, and then adds those new layers on to the main model.

# In[9]:


def get_ll_layers():
    return [ 
        BatchNormalization(input_shape=(4096,)),
        Dropout(0.5),
        Dense(2, activation='softmax') 
        ]


# In[46]:


def train_last_layer(i):
    ll_layers = get_ll_layers()
    ll_model = Sequential(ll_layers)
    ll_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    ll_model.optimizer.lr=1e-5
    ll_model.fit(ll_feat, trn_labels, validation_data=(ll_val_feat, val_labels), nb_epoch=12)
    ll_model.optimizer.lr=1e-7
    ll_model.fit(ll_feat, trn_labels, validation_data=(ll_val_feat, val_labels), nb_epoch=1)
    ll_model.save_weights(model_path+'ll_bn' + i + '.h5')

    vgg = Vgg16()
    model = vgg.model
    model.pop(); model.pop(); model.pop()
    for layer in model.layers: layer.trainable=False
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    ll_layers = get_ll_layers()
    for layer in ll_layers: model.add(layer)
    for l1,l2 in zip(ll_model.layers, model.layers[-3:]):
        l2.set_weights(l1.get_weights())
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    model.save_weights(model_path+'bn' + i + '.h5')
    return model


# ## Dense model

# In[47]:


def get_conv_model(model):
    layers = model.layers
    last_conv_idx = [index for index,layer in enumerate(layers) 
                         if type(layer) is Convolution2D][-1]

    conv_layers = layers[:last_conv_idx+1]
    conv_model = Sequential(conv_layers)
    fc_layers = layers[last_conv_idx+1:]
    return conv_model, fc_layers, last_conv_idx


# In[48]:


def get_fc_layers(p, in_shape):
    return [
        MaxPooling2D(input_shape=in_shape),
        Flatten(),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(2, activation='softmax')
        ]


# In[49]:


def train_dense_layers(i, model):
    conv_model, fc_layers, last_conv_idx = get_conv_model(model)
    conv_shape = conv_model.output_shape[1:]
    fc_model = Sequential(get_fc_layers(0.5, conv_shape))
    for l1,l2 in zip(fc_model.layers, fc_layers): 
        weights = l2.get_weights()
        l1.set_weights(weights)
    fc_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', 
                     metrics=['accuracy'])
    fc_model.fit(trn_features, trn_labels, nb_epoch=2, 
         batch_size=batch_size, validation_data=(val_features, val_labels))

    gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, 
       width_zoom_range=0.05, zoom_range=0.05,
       channel_shift_range=10, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True)
    batches = gen.flow(trn, trn_labels, batch_size=batch_size)
    val_batches = image.ImageDataGenerator().flow(val, val_labels, 
                      shuffle=False, batch_size=batch_size)

    for layer in conv_model.layers: layer.trainable = False
    for layer in get_fc_layers(0.5, conv_shape): conv_model.add(layer)
    for l1,l2 in zip(conv_model.layers[last_conv_idx+1:], fc_model.layers): 
        l1.set_weights(l2.get_weights())

    conv_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', 
                       metrics=['accuracy'])
    conv_model.save_weights(model_path+'no_dropout_bn' + i + '.h5')
    conv_model.fit_generator(batches, samples_per_epoch=batches.N, nb_epoch=1, 
                            validation_data=val_batches, nb_val_samples=val_batches.N)
    for layer in conv_model.layers[16:]: layer.trainable = True
    conv_model.fit_generator(batches, samples_per_epoch=batches.N, nb_epoch=8, 
                            validation_data=val_batches, nb_val_samples=val_batches.N)

    conv_model.optimizer.lr = 1e-7
    conv_model.fit_generator(batches, samples_per_epoch=batches.N, nb_epoch=10, 
                            validation_data=val_batches, nb_val_samples=val_batches.N)
    conv_model.save_weights(model_path + 'aug' + i + '.h5')


# ## Build ensemble

# In[50]:


for i in range(5):
    i = str(i)
    model = train_last_layer(i)
    train_dense_layers(i, model)


# ## Combine ensemble and test

# In[4]:


ens_model = vgg_ft(2)
for layer in ens_model.layers: layer.trainable=True


# In[52]:


def get_ens_pred(arr, fname):
    ens_pred = []
    for i in range(5):
        i = str(i)
        ens_model.load_weights('{}{}{}.h5'.format(model_path, fname, i))
        preds = ens_model.predict(arr, batch_size=batch_size)
        ens_pred.append(preds)
    return ens_pred


# In[55]:


val_pred2 = get_ens_pred(val, 'aug')


# In[56]:


val_avg_preds2 = np.stack(val_pred2).mean(axis=0)


# In[61]:


categorical_accuracy(val_labels, val_avg_preds2).eval()


# In[ ]: