In [ ]:

%install '.package(path: "$cwd/FastaiNotebook_02_fully_connected")' FastaiNotebook_02_fully_connected

Installing packages:
	.package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_02_fully_connected")
		FastaiNotebook_02_fully_connected
With SwiftPM flags: []
Working in: /tmp/tmp7mqq1mmn
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 1.83s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'FastaiNotebook_02_fully_connected' (4 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!

In [ ]:

import FastaiNotebook_02_fully_connected

In [ ]:

// export
import Foundation
import TensorFlow
import Path

Does nn.Conv2d init work well?¶

In [ ]:

var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/".fastai"/"data"/"mnist_tst")
let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())
xTrain = normalize(xTrain, mean: trainMean, std: trainStd)
xValid = normalize(xValid, mean: trainMean, std: trainStd)

In [ ]:

xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])
xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])
print(xTrain.shape, xValid.shape)

TensorShape(dimensions: [60000, 28, 28, 1]) TensorShape(dimensions: [10000, 28, 28, 1])

In [ ]:

let images = xTrain.shape[0]
let classes = xValid.max() + 1
let channels = 32

In [ ]:

var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)

In [ ]:

let x = xValid[0..<100]

In [ ]:

x.shape

Out[ ]:

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 100
    - 1 : 28
    - 2 : 28
    - 3 : 1

In [ ]:

extension Tensor where Scalar: TensorFlowFloatingPoint {
    func stats() -> (mean: Tensor, std: Tensor) {
        return (mean: self.mean(), std: self.standardDeviation())
    }
}

In [ ]:

(filter: layer1.filter.stats(), bias: layer1.bias.stats())

Out[ ]:

▿ 2 elements
  ▿ filter : 2 elements
    - mean : 0.0002112739
    - std : [[[[0.049460452]]]]
  ▿ bias : 2 elements
    - mean : 0.0
    - std : [0.0]

In [ ]:

let result = layer1.applied(to: x)

In [ ]:

result.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.00045435934
  - std : [[[[0.27018127]]]]

In [ ]:

// export
extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        self.init(
            randomNormal: shape,
            stddev: gain / sqrt(Scalar(fanIn)),
            generator: &PhiloxRandomNumberGenerator.global
        )
    }
}

In [ ]:

layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 1.0)
layer1.applied(to: x).stats()

Out[ ]:

▿ 2 elements
  - mean : -0.005352263
  - std : [[[[1.0782409]]]]

In [ ]:

// export
func leakyRelu<T: TensorFlowFloatingPoint>(
    _ x: Tensor<T>,
    negativeSlope: Double = 0.0
) -> Tensor<T> {
    return max(0, x) + T(negativeSlope) * min(0, x)
}

In [ ]:

layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.5224916
  - std : [[[[0.9566001]]]]

In [ ]:

var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)
leakyRelu(layer1.applied(to: x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.086791426
  - std : [[[[0.1612546]]]]

In [ ]:

layer1.filter.shape

Out[ ]:

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 5
    - 1 : 5
    - 2 : 1
    - 3 : 32

In [ ]:

let spatialDimCount = layer1.filter.rank - 2
let receptiveField = layer1.filter.shape[0..<spatialDimCount].contiguousSize
receptiveField

Out[ ]:

In [ ]:

let filtersIn = layer1.filter.shape[2]
let filtersOut = layer1.filter.shape[3]
print(filtersIn, filtersOut)

1 32

In [ ]:

let fanIn = filtersIn * receptiveField
let fanOut = filtersOut * receptiveField
print(fanIn, fanOut)

25 800

In [ ]:

func gain(_ negativeSlope: Double) -> Double {
    return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))
}

In [ ]:

(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))

Out[ ]:

▿ 5 elements
  - .0 : 1.0
  - .1 : 1.4142135623730951
  - .2 : 1.4141428569978354
  - .3 : 1.4071950894605838
  - .4 : 0.5773502691896257

In [ ]:

(2 * Tensor<Float>(randomUniform: [10000]) - 1).standardDeviation()

Out[ ]:

[0.5760468]

In [ ]:

1.0 / sqrt(3.0)

Out[ ]:

0.5773502691896258

In [ ]:

//export
extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        let bound = sqrt(Scalar(3.0)) * gain / sqrt(Scalar(fanIn))
        self = bound * (2 * Tensor(
            randomUniform: shape,
            generator: &PhiloxRandomNumberGenerator.global
        ) - 1)
    }
}

In [ ]:

layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.46286932
  - std : [[[[0.89172804]]]]

In [ ]:

layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: sqrt(5.0))
leakyRelu(layer1.applied(to: x)).stats()

Out[ ]:

▿ 2 elements
  - mean : 0.196594
  - std : [[[[0.38863888]]]]

In [ ]:

public struct Model: Layer {
    public var conv1 = FAConv2D<Float>(
        filterShape: (5, 5, 1, 8),   strides: (2, 2), padding: .same, activation: relu
    )
    public var conv2 = FAConv2D<Float>(
        filterShape: (3, 3, 8, 16),  strides: (2, 2), padding: .same, activation: relu
    )
    public var conv3 = FAConv2D<Float>(
        filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu
    )
    public var conv4 = FAConv2D<Float>(
        filterShape: (3, 3, 32, 1),  strides: (2, 2), padding: .valid
    )
    public var flatten = Flatten<Float>()
    @differentiable
    public func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
        return input.sequenced(
            in: context,
            through: conv1, conv2, conv3, conv4, flatten
        )
    }
}

In [ ]:

let y = Tensor<Float>(yValid[0..<100])
var model = Model()

In [ ]:

let prediction = model.applied(to: x)
prediction.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.16834545
  - std : [[0.06720749]]

In [ ]:

let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model.applied(
        to: x,
        in: Context(learningPhase: .training)
    ), expected: y)
}

gradients.conv1.filter.stats()

Out[ ]:

▿ 2 elements
  - mean : -0.2158769
  - std : [[[[0.36486608]]]]

In [ ]:

for keyPath in [\Model.conv1, \Model.conv2, \Model.conv3, \Model.conv4] {
    model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)
}

In [ ]:

let prediction = model.applied(to: x)
prediction.stats()

Out[ ]:

▿ 2 elements
  - mean : 0.06297659
  - std : [[0.17011806]]

In [ ]:

let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model.applied(
        to: x,
        in: Context(learningPhase: .training)
    ), expected: y)
}

gradients.conv1.filter.stats()

Out[ ]:

▿ 2 elements
  - mean : -0.26000252
  - std : [[[[0.26875323]]]]

Export¶

In [ ]:

notebookToScript(fname: (Path.cwd / "02a_why_sqrt5.ipynb").string)

In [ ]: