%install '.package(path: "$cwd/FastaiNotebook_02_fully_connected")' FastaiNotebook_02_fully_connected
Installing packages: .package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_02_fully_connected") FastaiNotebook_02_fully_connected With SwiftPM flags: [] Working in: /tmp/tmp7mqq1mmn Fetching https://github.com/mxcl/Path.swift Fetching https://github.com/JustHTTP/Just Completed resolution in 1.83s Cloning https://github.com/mxcl/Path.swift Resolving https://github.com/mxcl/Path.swift at 0.16.2 Cloning https://github.com/JustHTTP/Just Resolving https://github.com/JustHTTP/Just at 0.7.1 Compile Swift Module 'Path' (9 sources) Compile Swift Module 'Just' (1 sources) Compile Swift Module 'FastaiNotebook_02_fully_connected' (4 sources) Compile Swift Module 'jupyterInstalledPackages' (1 sources) Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so Initializing Swift... Loading library... Installation complete!
import FastaiNotebook_02_fully_connected
// export
import Foundation
import TensorFlow
import Path
var (xTrain, yTrain, xValid, yValid) = loadMNIST(path: Path.home/".fastai"/"data"/"mnist_tst")
let (trainMean, trainStd) = (xTrain.mean(), xTrain.standardDeviation())
xTrain = normalize(xTrain, mean: trainMean, std: trainStd)
xValid = normalize(xValid, mean: trainMean, std: trainStd)
xTrain = xTrain.reshaped(to: [xTrain.shape[0], 28, 28, 1])
xValid = xValid.reshaped(to: [xValid.shape[0], 28, 28, 1])
print(xTrain.shape, xValid.shape)
TensorShape(dimensions: [60000, 28, 28, 1]) TensorShape(dimensions: [10000, 28, 28, 1])
let images = xTrain.shape[0]
let classes = xValid.max() + 1
let channels = 32
var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)
let x = xValid[0..<100]
x.shape
▿ TensorShape
▿ dimensions : 4 elements
- 0 : 100
- 1 : 28
- 2 : 28
- 3 : 1
extension Tensor where Scalar: TensorFlowFloatingPoint {
func stats() -> (mean: Tensor, std: Tensor) {
return (mean: self.mean(), std: self.standardDeviation())
}
}
(filter: layer1.filter.stats(), bias: layer1.bias.stats())
▿ 2 elements
▿ filter : 2 elements
- mean : 0.0002112739
- std : [[[[0.049460452]]]]
▿ bias : 2 elements
- mean : 0.0
- std : [0.0]
let result = layer1.applied(to: x)
result.stats()
▿ 2 elements - mean : 0.00045435934 - std : [[[[0.27018127]]]]
// export
extension Tensor where Scalar: TensorFlowFloatingPoint {
init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {
// Assumes Leaky ReLU nonlinearity
let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
let spatialDimCount = shape.count - 2
let receptiveField = shape[0..<spatialDimCount].contiguousSize
let fanIn = shape[shape.count - 2] * receptiveField
self.init(
randomNormal: shape,
stddev: gain / sqrt(Scalar(fanIn)),
generator: &PhiloxRandomNumberGenerator.global
)
}
}
layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 1.0)
layer1.applied(to: x).stats()
▿ 2 elements - mean : -0.005352263 - std : [[[[1.0782409]]]]
// export
func leakyRelu<T: TensorFlowFloatingPoint>(
_ x: Tensor<T>,
negativeSlope: Double = 0.0
) -> Tensor<T> {
return max(0, x) + T(negativeSlope) * min(0, x)
}
layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()
▿ 2 elements - mean : 0.5224916 - std : [[[[0.9566001]]]]
var layer1 = FAConv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)
leakyRelu(layer1.applied(to: x)).stats()
▿ 2 elements - mean : 0.086791426 - std : [[[[0.1612546]]]]
layer1.filter.shape
▿ TensorShape
▿ dimensions : 4 elements
- 0 : 5
- 1 : 5
- 2 : 1
- 3 : 32
let spatialDimCount = layer1.filter.rank - 2
let receptiveField = layer1.filter.shape[0..<spatialDimCount].contiguousSize
receptiveField
25
let filtersIn = layer1.filter.shape[2]
let filtersOut = layer1.filter.shape[3]
print(filtersIn, filtersOut)
1 32
let fanIn = filtersIn * receptiveField
let fanOut = filtersOut * receptiveField
print(fanIn, fanOut)
25 800
func gain(_ negativeSlope: Double) -> Double {
return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))
}
(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))
▿ 5 elements - .0 : 1.0 - .1 : 1.4142135623730951 - .2 : 1.4141428569978354 - .3 : 1.4071950894605838 - .4 : 0.5773502691896257
(2 * Tensor<Float>(randomUniform: [10000]) - 1).standardDeviation()
[0.5760468]
1.0 / sqrt(3.0)
0.5773502691896258
//export
extension Tensor where Scalar: TensorFlowFloatingPoint {
init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {
// Assumes Leaky ReLU nonlinearity
let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
let spatialDimCount = shape.count - 2
let receptiveField = shape[0..<spatialDimCount].contiguousSize
let fanIn = shape[shape.count - 2] * receptiveField
let bound = sqrt(Scalar(3.0)) * gain / sqrt(Scalar(fanIn))
self = bound * (2 * Tensor(
randomUniform: shape,
generator: &PhiloxRandomNumberGenerator.global
) - 1)
}
}
layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()
▿ 2 elements - mean : 0.46286932 - std : [[[[0.89172804]]]]
layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: sqrt(5.0))
leakyRelu(layer1.applied(to: x)).stats()
▿ 2 elements - mean : 0.196594 - std : [[[[0.38863888]]]]
public struct Model: Layer {
public var conv1 = FAConv2D<Float>(
filterShape: (5, 5, 1, 8), strides: (2, 2), padding: .same, activation: relu
)
public var conv2 = FAConv2D<Float>(
filterShape: (3, 3, 8, 16), strides: (2, 2), padding: .same, activation: relu
)
public var conv3 = FAConv2D<Float>(
filterShape: (3, 3, 16, 32), strides: (2, 2), padding: .same, activation: relu
)
public var conv4 = FAConv2D<Float>(
filterShape: (3, 3, 32, 1), strides: (2, 2), padding: .valid
)
public var flatten = Flatten<Float>()
@differentiable
public func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
return input.sequenced(
in: context,
through: conv1, conv2, conv3, conv4, flatten
)
}
}
let y = Tensor<Float>(yValid[0..<100])
var model = Model()
let prediction = model.applied(to: x)
prediction.stats()
▿ 2 elements - mean : 0.16834545 - std : [[0.06720749]]
let gradients = gradient(at: model) { model in
meanSquaredError(predicted: model.applied(
to: x,
in: Context(learningPhase: .training)
), expected: y)
}
gradients.conv1.filter.stats()
▿ 2 elements - mean : -0.2158769 - std : [[[[0.36486608]]]]
for keyPath in [\Model.conv1, \Model.conv2, \Model.conv3, \Model.conv4] {
model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)
}
let prediction = model.applied(to: x)
prediction.stats()
▿ 2 elements - mean : 0.06297659 - std : [[0.17011806]]
let gradients = gradient(at: model) { model in
meanSquaredError(predicted: model.applied(
to: x,
in: Context(learningPhase: .training)
), expected: y)
}
gradients.conv1.filter.stats()
▿ 2 elements - mean : -0.26000252 - std : [[[[0.26875323]]]]
notebookToScript(fname: (Path.cwd / "02a_why_sqrt5.ipynb").string)