Notebook

Callbacks¶

In [ ]:

%install '.package(path: "$cwd/FastaiNotebook_03_minibatch_training")' FastaiNotebook_03_minibatch_training

Installing packages:
	.package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_03_minibatch_training")
		FastaiNotebook_03_minibatch_training
With SwiftPM flags: []
Working in: /tmp/tmpaz6m2tdz
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 2.39s
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'FastaiNotebook_03_minibatch_training' (6 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!

Load data¶

In [ ]:

import FastaiNotebook_03_minibatch_training

In [ ]:

// export
import Path
import TensorFlow

In [ ]:

var (xTrain,yTrain,xValid,yValid) = loadMNIST(path: mnistPath, flat: true)

In [ ]:

let (n,m) = (Int(xTrain.shape[0]),Int(xTrain.shape[1]))
let c = yTrain.max()+1
print(n,m,c)

60000 784 10

Those can't be used to define a model cause they're not Ints though...

In [ ]:

let (n,m) = (60000,784)
let c = 10
let nHid = 50

In [ ]:

// export
public struct BasicModel: Layer {
    public var layer1: Dense<Float>
    public var layer2: Dense<Float>
    
    public init(nIn: Int, nHid: Int, nOut: Int){
        layer1 = Dense(inputSize: nIn, outputSize: nHid, activation: relu)
        layer2 = Dense(inputSize: nHid, outputSize: nOut)
    }
    
    @differentiable
    public func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
        return input.sequenced(in: context, through: layer1, layer2)
    }
}

In [ ]:

var model = BasicModel(nIn: m, nHid: nHid, nOut: c)

In [ ]:

// export
public struct DataBunch<Element> where Element: TensorGroup{
    private var _train: Dataset<Element>
    private var _valid: Dataset<Element>
    public var shuffleTrain: Bool = true
    public var shuffleValid: Bool = false
    public var batchSize: Int = 64 
    public var train: Dataset<Element> { return processDs(_train, shuffleTrain) }
    public var valid: Dataset<Element> { return processDs(_valid, shuffleValid) }
    
    private func processDs(_ ds: Dataset<Element>, _ shuffle: Bool) -> Dataset<Element>{
        if !shuffle { return ds.batched(Int64(batchSize))}
        let count = Int64(ds.count(where: {_ in true}))
        return ds.batched(Int64(batchSize)).shuffled(sampleCount: count, randomSeed: Int64(random()))
    }
    
    public init(train: Dataset<Element>, valid: Dataset<Element>, batchSize: Int = 64) {
        (self._train, self._valid, self.batchSize)  = (train, valid, batchSize)
    }
}

In [ ]:

//export
public func mnistDataBunch(path: Path = mnistPath, flat: Bool = false, bs: Int = 64
                          ) -> DataBunch<DataBatch<Tensor<Float>, Tensor<Int32>>>{
    let (xTrain,yTrain,xValid,yValid) = loadMNIST(path: path, flat: flat)
    return DataBunch(train: Dataset(elements:DataBatch(xb:xTrain, yb:yTrain)), 
                     valid: Dataset(elements:DataBatch(xb:xValid, yb:yValid)),
                     batchSize: bs)
}

In [ ]:

let data = mnistDataBunch(flat: true)

Shuffle test

In [ ]:

var tst = data.train
var firstBatch: DataBatch<Tensor<Float>, Tensor<Int32>>? = nil
for batch in tst{
    firstBatch = batch
    break
}
firstBatch!.yb

Out[ ]:

[8, 8, 1, 7, 9, 8, 8, 1, 2, 0, 4, 0, 9, 6, 9, 7, 0, 7, 4, 5, 4, 0, 3, 6, 9, 1, 7, 1, 9, 2, 3, 5, 5, 0, 9, 7, 0, 0, 4, 4, 1, 4, 5, 1, 0, 8, 3, 6, 4, 0, 8, 0, 1, 0, 7, 3, 2, 7, 6, 1, 7, 6, 7, 4]

In [ ]:

var tst = data.train
var firstBatch: DataBatch<Tensor<Float>, Tensor<Int32>>? = nil
for batch in tst{
    firstBatch = batch
    break
}
firstBatch!.yb

Out[ ]:

[8, 0, 9, 7, 2, 5, 2, 8, 8, 8, 7, 3, 3, 7, 3, 8, 8, 9, 8, 2, 6, 5, 2, 3, 8, 1, 9, 7, 9, 3, 1, 9, 6, 1, 9, 9, 5, 9, 6, 6, 7, 0, 4, 9, 4, 4, 6, 3, 0, 7, 4, 0, 0, 4, 0, 1, 6, 4, 5, 3, 3, 5, 3, 4]

Learner¶

In [ ]:

// export
public enum LearnerAction: Error {
    case skipEpoch
    case skipBatch
    case stop
}

Basic class

In [ ]:

// export
/// A model learner, responsible for initializing and training a model on a given dataset.
public final class Learner<Label: TensorGroup,
                           Opt: TensorFlow.Optimizer & AnyObject>
    where Opt.Scalar: Differentiable,
          // Constrain model input to Tensor<Float>, to work around
          // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
          Opt.Model.Input == Tensor<Float>
{
    // Common type aliases.
    public typealias Input = Model.Input
    public typealias Data = DataBunch<DataBatch<Input, Label>>
    public typealias Loss = Tensor<Float>
    public typealias Optimizer = Opt
    public typealias Model = Optimizer.Model
    public typealias Variables = Model.AllDifferentiableVariables
    public typealias EventHandler = (Learner) throws -> Void
    
    /// A wrapper class to hold the loss function, to work around
    // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
    public final class LossFunction {
        public typealias F = @differentiable (Model.Output, @nondiff Label) -> Loss
        public var f: F
        init(_ f: @escaping F) { self.f = f }
    }
    
    /// The dataset on which the model will be trained.
    public var data: Data
    /// The optimizer used for updating model parameters along gradient vectors.
    public var optimizer: Optimizer
    /// The function that computes a loss value when given a prediction and a label.
    public var lossFunction: LossFunction
    /// The model being trained.
    public var model: Model
    
    //Is there a better way to initialize those to not make them Optionals?
    public var currentInput: Input? = nil
    public var currentTarget: Label? = nil
    public var currentOutput: Model.Output? = nil
    
    /// The number of total epochs.
    public private(set) var epochCount: Int = .zero
    /// The current epoch.
    public private(set) var currentEpoch: Int = .zero
    /// The current gradient.
    public private(set) var currentGradient: Model.CotangentVector = .zero
    /// The current loss.
    public private(set) var currentLoss: Loss = .zero
    /// In training mode or not
    public private(set) var inTrain: Bool = false
    /// The current epoch + iteration, float between 0.0 and epochCount
    public private(set) var pctEpochs: Float = 0.0
    /// The current iteration
    public private(set) var currentIter: Int = 0
    /// The number of iterations in the current dataset
    public private(set) var iterCount: Int = 0
    
    open class Delegate {
        open var order: Int { return 0 }
        public init () {}
        
        open func trainingWillStart(learner: Learner) throws {}
        /// The completion of model training.
        open func trainingDidFinish(learner: Learner) throws {}
        /// A closure which will be called upon the start of an epoch.
        open func epochWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the completion of an epoch.
        open func epochDidFinish(learner: Learner) throws {}
        /// A closure which will be called upon the start of model validation.
        open func validationWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the start of training on a batch.
        open func batchWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the completion of training on a batch.
        open func batchDidFinish(learner: Learner) throws {}
        /// A closure which will be called when a new gradient has been computed.
        open func didProduceNewGradient(learner: Learner) throws {}
        /// A closure which will be called upon the completion of an optimizer update.
        open func optimizerDidUpdate(learner: Learner) throws {}
        ///
        /// TODO: learnerDidProduceNewOutput and learnerDidProduceNewLoss need to
        /// be differentiable once we can have the loss function inside the Learner
    }
    
    public var delegates: [Delegate] = [] {
        didSet { delegates.sort { $0.order < $1.order } }
    }
    
    /// The context used for layer applications.
    public private(set) var context = Context(learningPhase: .training)

    /// Creates a learner.
    ///
    /// - Parameters:
    ///   - dataset: The dataset which will be trained on.
    ///   - lossFunction: The loss function.
    ///   - optimizer: The optimizer used for updating model parameters along
    ///     gradient vectors.
    ///   - modelInitializer: The closure that produces an model to be trained.
    ///
    public init(data: Data,
                lossFunction: @escaping LossFunction.F,
                optimizer: Optimizer,
                initializingWith modelInitializer: () -> Model) {
        self.data = data
        self.optimizer = optimizer
        self.lossFunction = LossFunction(lossFunction)
        self.model = modelInitializer()
    }
}

Then let's write the parts of the training loop:

In [ ]:

// export
extension Learner {
    /// Trains the model on the given batch.
    ///
    /// - Parameter batch: The batch of input data and labels to be trained on.
    ///
    private func evaluate(onBatch batch: DataBatch<Input, Label>) throws {
        currentOutput = model.applied(to: currentInput!, in: context)
        currentLoss = lossFunction.f(currentOutput!, currentTarget!)
    }
    
    private func train(onBatch batch: DataBatch<Input, Label>) throws {
        let (xb,yb) = (currentInput!,currentTarget!)
        (currentLoss, currentGradient) = model.valueWithGradient { model -> Loss in 
            let y = model.applied(to: xb, in: context)                                      
            currentOutput = y
            return lossFunction.f(y, yb)
        }
        try delegates.forEach { try $0.didProduceNewGradient(learner: self) }
        optimizer.update(&model.allDifferentiableVariables, along: self.currentGradient)
    }
    
    /// Performs a training epoch on a Dataset.
    private func train(onDataset ds: Dataset<DataBatch<Input, Label>>) throws {
        iterCount = ds.count(where: {_ in true})
        for batch in ds {
            (currentInput, currentTarget) = (batch.xb, batch.yb)
            try delegates.forEach { try $0.batchWillStart(learner: self) }
            do { if inTrain { try train(onBatch: batch) } else { try evaluate(onBatch: batch) }}
            catch LearnerAction.skipBatch {}
            try delegates.forEach { try $0.batchDidFinish(learner: self) }
        }
    }
}

And the whole fit function.

In [ ]:

// export
extension Learner {
    /// Starts fitting.
    /// - Parameter epochCount: The number of epochs that will be run.
    public func fit(_ epochCount: Int) throws {
        self.epochCount = epochCount
        do {
            try delegates.forEach { try $0.trainingWillStart(learner: self) }
            for i in 0..<epochCount {
                self.currentEpoch = i
                do {
                    try delegates.forEach { try $0.epochWillStart(learner: self) }
                    do { try train(onDataset: data.train) }
                    try delegates.forEach { try $0.validationWillStart(learner: self) }
                    do { try train(onDataset: data.valid) }
                    
                } catch LearnerAction.skipEpoch {}
                try delegates.forEach { try $0.epochDidFinish(learner: self) }
            }
        } catch LearnerAction.stop {}
        try delegates.forEach { try $0.trainingDidFinish(learner: self) }
    }
}

Test¶

In [ ]:

let opt = SGD<BasicModel, Float>(learningRate: 1e-2)

In [ ]:

func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}

In [ ]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [ ]:

learner.fit(2)

Let's add Callbacks!¶

Train/eval¶

Callback classes are defined as extensions of the Learner.

In [ ]:

// export
extension Learner {
    public class TrainEvalDelegate: Delegate {
        public override func trainingWillStart(learner: Learner) {
            learner.pctEpochs = 0.0
        }

        public override func epochWillStart(learner: Learner) {
            learner.pctEpochs = Float(learner.currentEpoch)
            learner.context = Context(learningPhase: .training)
            learner.inTrain = true
            learner.currentIter = 0
        }
        
        public override func batchDidFinish(learner: Learner) {
            learner.currentIter += 1
            if learner.inTrain{
                learner.pctEpochs   += 1.0 / Float(learner.iterCount)
            }
        }
        
        public override func validationWillStart(learner: Learner) {
            learner.context = Context(learningPhase: .inference)
            learner.inTrain = false
            learner.currentIter = 0
        }
    }
    
    public func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }
}

In [ ]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [ ]:

learner.delegates = [learner.makeTrainEvalDelegate()]

In [ ]:

learner.fit(2)

AverageMetric¶

In [ ]:

// export
// TODO: make metrics more generic (probably for after the course)
extension Learner {
    public class AvgMetric: Delegate {
        public let metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]
        var total: Int = 0
        var partials: [Tensor<Float>] = []
        
        public init(metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]){ self.metrics = metrics}
        
        public override func epochWillStart(learner: Learner) {
            total = 0
            partials = Array(repeating: Tensor(0), count: metrics.count + 1)
        }
        
        public override func batchDidFinish(learner: Learner) {
            if !learner.inTrain{
                if let target = learner.currentTarget as? Tensor<Int32>{
                    let bs = target.shape[0]
                    total += Int(bs)
                    partials[0] += Float(bs) * learner.currentLoss
                    for i in 1...metrics.count{
                        partials[i] += Float(bs) * metrics[i-1]((learner.currentOutput as! Tensor<Float>), target)
                    }
                }
            }
        }
        
        public override func epochDidFinish(learner: Learner) {
            for i in 0...metrics.count {partials[i] = partials[i] / Float(total)}
            print("Epoch \(learner.currentEpoch): \(partials)")
        }
    }
    
    public func makeAvgMetric(metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]) -> AvgMetric{
        return AvgMetric(metrics: metrics)
    }
}

In [ ]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [ ]:

learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy])]

In [ ]:

learner.fit(2)

Epoch 0: [0.48076352, 0.8779]
Epoch 1: [0.36723757, 0.8996]

Normalization¶

In [ ]:

// export
// TODO: make metrics more generic (probably for after the course)
extension Learner {
    public class Normalize: Delegate {
        public let mean, std: Tensor<Float>
        public init(mean: Tensor<Float>, std: Tensor<Float>){ 
            (self.mean,self.std) = (mean,std)
        }
        
        public override func batchWillStart(learner: Learner) {
            learner.currentInput = (learner.currentInput! - mean) / std
        }
    }
    
    public func makeNormalize(mean: Tensor<Float>, std: Tensor<Float>) -> Normalize{
        return Normalize(mean: mean, std: std)
    }
}

In [ ]:

(xTrain.mean(), xTrain.standardDeviation())

Out[ ]:

▿ 2 elements
  - .0 : 0.13066047
  - .1 : [[0.3081079]]

In [ ]:

// export
public let mnistStats = (mean: Tensor<Float>(0.13066047), std: Tensor<Float>(0.3081079))

In [ ]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [ ]:

learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),
                     learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]

In [ ]:

learner.fit(2)

Epoch 0: [0.30538177, 0.9112]
Epoch 1: [0.2491324, 0.9276]

In [ ]:

Export¶

In [ ]:

notebookToScript(fname: (Path.cwd / "04_callbacks.ipynb").string)

In [ ]: