#!/usr/bin/env python
# coding: utf-8

# ## Model Layers

# This module contains many layer classes that we might be interested in using in our models. These layers complement the default [Pytorch layers](https://pytorch.org/docs/stable/nn.html) which we can also use as predefined layers.

# In[1]:


from fastai.vision import *
from fastai.gen_doc.nbdoc import *


# ## Custom fastai modules

# In[2]:


show_doc(AdaptiveConcatPool2d, title_level=3)


# In[3]:


from fastai.gen_doc.nbdoc import *
from fastai.layers import * 


# The output will be `2*sz`, or just 2 if `sz` is None.

# The [`AdaptiveConcatPool2d`](/layers.html#AdaptiveConcatPool2d) object uses adaptive average pooling and adaptive max pooling and concatenates them both. We use this because it provides the model with the information of both methods and improves performance. This technique is called `adaptive` because it allows us to decide on what output dimensions we want, instead of choosing the input's dimensions to fit a desired output size.
# 
# Let's try training with Adaptive Average Pooling first, then with Adaptive Max Pooling and finally with the concatenation of them both to see how they fare in performance.
# 
# We will first define a [`simple_cnn`](/layers.html#simple_cnn) using [Adaptive Max Pooling](https://pytorch.org/docs/stable/nn.html#torch.nn.AdaptiveMaxPool2d) by changing the source code a bit.

# In[ ]:


path = untar_data(URLs.MNIST_SAMPLE)
data = ImageDataBunch.from_folder(path)


# In[ ]:


def simple_cnn_max(actns:Collection[int], kernel_szs:Collection[int]=None,
               strides:Collection[int]=None) -> nn.Sequential:
    "CNN with `conv2d_relu` layers defined by `actns`, `kernel_szs` and `strides`"
    nl = len(actns)-1
    kernel_szs = ifnone(kernel_szs, [3]*nl)
    strides    = ifnone(strides   , [2]*nl)
    layers = [conv_layer(actns[i], actns[i+1], kernel_szs[i], stride=strides[i])
        for i in range(len(strides))]
    layers.append(nn.Sequential(nn.AdaptiveMaxPool2d(1), Flatten()))
    return nn.Sequential(*layers)


# In[ ]:


model = simple_cnn_max((3,16,16,2))
learner = Learner(data, model, metrics=[accuracy])
learner.fit(1)


# Now let's try with [Adaptive Average Pooling](https://pytorch.org/docs/stable/nn.html#torch.nn.AdaptiveAvgPool2d) now.

# In[ ]:


def simple_cnn_avg(actns:Collection[int], kernel_szs:Collection[int]=None,
               strides:Collection[int]=None) -> nn.Sequential:
    "CNN with `conv2d_relu` layers defined by `actns`, `kernel_szs` and `strides`"
    nl = len(actns)-1
    kernel_szs = ifnone(kernel_szs, [3]*nl)
    strides    = ifnone(strides   , [2]*nl)
    layers = [conv_layer(actns[i], actns[i+1], kernel_szs[i], stride=strides[i])
        for i in range(len(strides))]
    layers.append(nn.Sequential(nn.AdaptiveAvgPool2d(1), Flatten()))
    return nn.Sequential(*layers)


# In[ ]:


model = simple_cnn_avg((3,16,16,2))
learner = Learner(data, model, metrics=[accuracy])
learner.fit(1)


# Finally we will try with the concatenation of them both [`AdaptiveConcatPool2d`](/layers.html#AdaptiveConcatPool2d). We will see that, in fact, it increases our accuracy and decreases our loss considerably!

# In[ ]:


def simple_cnn(actns:Collection[int], kernel_szs:Collection[int]=None,
               strides:Collection[int]=None) -> nn.Sequential:
    "CNN with `conv2d_relu` layers defined by `actns`, `kernel_szs` and `strides`"
    nl = len(actns)-1
    kernel_szs = ifnone(kernel_szs, [3]*nl)
    strides    = ifnone(strides   , [2]*nl)
    layers = [conv_layer(actns[i], actns[i+1], kernel_szs[i], stride=strides[i])
        for i in range(len(strides))]
    layers.append(nn.Sequential(AdaptiveConcatPool2d(1), Flatten()))
    return nn.Sequential(*layers)


# In[ ]:


model = simple_cnn((3,16,16,2))
learner = Learner(data, model, metrics=[accuracy])
learner.fit(1)


# In[4]:


show_doc(Lambda, title_level=3)


# This is very useful to use functions as layers in our networks inside a [Sequential](https://pytorch.org/docs/stable/nn.html#torch.nn.Sequential) object. So, for example, say we want to apply a [log_softmax loss](https://pytorch.org/docs/stable/nn.html#torch.nn.functional.log_softmax) and we need to change the shape of our output batches to be able to use this loss. We can add a layer that applies the necessary change in shape by calling:
# 
# `Lambda(lambda x: x.view(x.size(0),-1))`

# Let's see an example of how the shape of our output can change when we add this layer.

# In[ ]:


model = nn.Sequential(
    nn.Conv2d(3,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
)

model.cuda()

for xb, yb in data.train_dl:
    out = (model(*[xb]))
    print(out.size())
    break


# In[ ]:


model = nn.Sequential(
    nn.Conv2d(3,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Lambda(lambda x: x.view(x.size(0),-1))
)

model.cuda()

for xb, yb in data.train_dl:
    out = (model(*[xb]))
    print(out.size())
    break


# In[5]:


show_doc(Flatten)


# The function we build above is actually implemented in our library as [`Flatten`](/layers.html#Flatten). We can see that it returns the same size when we run it.

# In[ ]:


model = nn.Sequential(
    nn.Conv2d(3,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.AdaptiveAvgPool2d(1),
    Flatten(),
)

model.cuda()

for xb, yb in data.train_dl:
    out = (model(*[xb]))
    print(out.size())
    break


# In[6]:


show_doc(PoolFlatten)


# We can combine these two final layers ([AdaptiveAvgPool2d](https://pytorch.org/docs/stable/nn.html#torch.nn.AdaptiveAvgPool2d) and [`Flatten`](/layers.html#Flatten)) by using [`PoolFlatten`](/layers.html#PoolFlatten).

# In[ ]:


model = nn.Sequential(
    nn.Conv2d(3,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
    PoolFlatten()
)

model.cuda()

for xb, yb in data.train_dl:
    out = (model(*[xb]))
    print(out.size())
    break


# Another use we give to the Lambda function is to resize batches with [`ResizeBatch`](/layers.html#ResizeBatch) when we have a layer that expects a different input than what comes from the previous one.

# In[7]:


show_doc(ResizeBatch)


# In[ ]:


a = torch.tensor([[1., -1.], [1., -1.]])[None]
print(a)


# In[ ]:


out = ResizeBatch(4)
print(out(a))


# In[8]:


show_doc(Debugger, title_level=3)


# The debugger module allows us to peek inside a network while its training and see in detail what is going on. We can see inputs, outputs and sizes at any point in the network.
# 
# For instance, if you run the following:
# 
# ``` python
# model = nn.Sequential(
#     nn.Conv2d(3,  16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
#     Debugger(),
#     nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(),
#     nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(),
# )
# 
# model.cuda()
# 
# learner = Learner(data, model, metrics=[accuracy])
# learner.fit(5)
# ```
# ... you'll see something like this:
# 
# ```
# /home/ubuntu/fastai/fastai/layers.py(74)forward()
#      72     def forward(self,x:Tensor) -> Tensor:
#      73         set_trace()
# ---> 74         return x
#      75 
#      76 class StdUpsample(nn.Module):
# 
# ipdb>
# ```

# In[9]:


show_doc(PixelShuffle_ICNR, title_level=3)


# In[10]:


show_doc(MergeLayer, title_level=3)


# In[11]:


show_doc(PartialLayer, title_level=3)


# In[12]:


show_doc(SigmoidRange, title_level=3)


# In[13]:


show_doc(SequentialEx, title_level=3)


# In[14]:


show_doc(SelfAttention, title_level=3)


# In[15]:


show_doc(BatchNorm1dFlat, title_level=3)


# ## Loss functions

# In[16]:


show_doc(FlattenedLoss, title_level=3)


# Create an instance of `func` with `args` and `kwargs`. When passing an output and target, it
# - puts `axis` first in output and target with a transpose
# - casts the target to `float` if `floatify=True`
# - squeezes the `output` to two dimensions if `is_2d`, otherwise one dimension, squeezes the target to one dimension
# - applies the instance of `func`.

# In[17]:


show_doc(BCEFlat)


# In[18]:


show_doc(BCEWithLogitsFlat)


# In[19]:


show_doc(CrossEntropyFlat)


# In[20]:


show_doc(MSELossFlat)


# In[21]:


show_doc(NoopLoss)


# In[22]:


show_doc(WassersteinLoss)


# ## Helper functions to create modules

# In[23]:


show_doc(bn_drop_lin, doc_string=False)


# The [`bn_drop_lin`](/layers.html#bn_drop_lin) function returns a sequence of [batch normalization](https://arxiv.org/abs/1502.03167), [dropout](https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf) and a linear layer. This custom layer is usually used at the end of a model. 
# 
# `n_in` represents the size of the input, `n_out` the size of the output, `bn` whether we want batch norm or not, `p` how much dropout, and `actn` (optional parameter) adds an activation function at the end.

# In[24]:


show_doc(conv2d)


# In[25]:


show_doc(conv2d_trans)


# In[26]:


show_doc(conv_layer, doc_string=False)


# The [`conv_layer`](/layers.html#conv_layer) function returns a sequence of [nn.Conv2D](https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d), [BatchNorm](https://arxiv.org/abs/1502.03167) and a ReLU or [leaky RELU](https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf) activation function.
# 
# `n_in` represents the size of the input, `n_out` the size of the output, `ks` the kernel size, `stride` the stride with which we want to apply the convolutions. `bias` will decide if they have bias or not (if None, defaults to True unless using batchnorm). `norm_type` selects the type of normalization (or `None`). If `leaky` is None, the activation is a standard `ReLU`, otherwise it's a `LeakyReLU` of slope `leaky`. Finally if `transpose=True`, the convolution is replaced by a `ConvTranspose2D`.

# In[27]:


show_doc(embedding, doc_string=False)


# Create an [embedding layer](https://arxiv.org/abs/1711.09160) with input size `ni` and output size `nf`.

# In[28]:


show_doc(relu)


# In[29]:


show_doc(res_block)


# In[30]:


show_doc(sigmoid_range)


# In[31]:


show_doc(simple_cnn)


# ## Initialization of modules

# In[32]:


show_doc(batchnorm_2d)


# In[33]:


show_doc(icnr)


# In[34]:


show_doc(trunc_normal_)


# In[35]:


show_doc(icnr)


# In[36]:


show_doc(NormType)


# ## Undocumented Methods - Methods moved below this line will intentionally be hidden

# In[37]:


show_doc(Debugger.forward)


# In[38]:


show_doc(Lambda.forward)


# In[39]:


show_doc(AdaptiveConcatPool2d.forward)


# In[40]:


show_doc(NoopLoss.forward)


# In[41]:


show_doc(PixelShuffle_ICNR.forward)


# In[42]:


show_doc(WassersteinLoss.forward)


# In[43]:


show_doc(MergeLayer.forward)


# In[44]:


show_doc(SigmoidRange.forward)


# In[45]:


show_doc(MergeLayer.forward)


# In[46]:


show_doc(SelfAttention.forward)


# In[47]:


show_doc(SequentialEx.forward)


# In[48]:


show_doc(SequentialEx.append)


# In[49]:


show_doc(SequentialEx.extend)


# In[50]:


show_doc(SequentialEx.insert)


# In[51]:


show_doc(PartialLayer.forward)


# In[52]:


show_doc(BatchNorm1dFlat.forward)


# In[53]:


show_doc(Flatten.forward)


# ## New Methods - Please document or move to the undocumented section

# In[54]:


show_doc(View)


# 

# In[55]:


show_doc(ResizeBatch.forward)


# 

# In[56]:


show_doc(View.forward)


#