Notebook

Callbacks¶

In [1]:

%install '.package(path: "$cwd/FastaiNotebook_03_minibatch_training")' FastaiNotebook_03_minibatch_training

Installing packages:
	.package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_03_minibatch_training")
		FastaiNotebook_03_minibatch_training
With SwiftPM flags: []
Working in: /tmp/tmpgb29yv6i/swift-install
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 2.55s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'FastaiNotebook_03_minibatch_training' (7 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Installation complete!

Load data¶

In [2]:

import FastaiNotebook_03_minibatch_training

In [3]:

// export
import Path
import TensorFlow

In [4]:

var (xTrain,yTrain,xValid,yValid) = loadMNIST(path: mnistPath, flat: true)

In [5]:

let (n,m) = (xTrain.shape[0],xTrain.shape[1])
let c = yTrain.max().scalarized()+1
print(n,m,c)
let nHid = 50

60000 784 10

In [6]:

// export
public struct BasicModel: Layer {
    public var layer1: Dense<Float>
    public var layer2: Dense<Float>
    
    public init(nIn: Int, nHid: Int, nOut: Int){
        layer1 = Dense(inputSize: nIn, outputSize: nHid, activation: relu)
        layer2 = Dense(inputSize: nHid, outputSize: nOut)
    }
    
    @differentiable
    public func applied(to input: Tensor<Float>) -> Tensor<Float> {
        return input.sequenced(through: layer1, layer2)
    }
}

In [7]:

var model = BasicModel(nIn: m, nHid: nHid, nOut: Int(c))

In [8]:

//export 
public struct FADataset<Element> where Element: TensorGroup{
    public var innerDs: Dataset<Element>
    public var shuffle: Bool = false
    public var bs: Int = 64 
    public var dsCount: Int
    
    public var count: Int {
        return dsCount%bs == 0 ? dsCount/bs : dsCount/bs+1
    }
    
    public var ds: Dataset<Element> { 
        if !shuffle { return innerDs.batched(Int64(bs))}
        let seed = Int64.random(in: Int64.min..<Int64.max)
        return innerDs.shuffled(sampleCount: Int64(dsCount), randomSeed: seed).batched(Int64(bs))
    }
    
    public init(_ ds: Dataset<Element>, len: Int, shuffle: Bool = false, bs: Int = 64){
        (self.innerDs,self.dsCount,self.shuffle,self.bs) = (ds, len, shuffle, bs)
    }
}

In [9]:

// export
public struct DataBunch<Element> where Element: TensorGroup{
    public var train: FADataset<Element>
    public var valid: FADataset<Element> 
    
    public init(train: Dataset<Element>, valid: Dataset<Element>, trainLen: Int, validLen: Int,  bs: Int = 64) {
        self.train = FADataset(train, len: trainLen, shuffle: true,  bs: bs)
        self.valid = FADataset(valid, len: validLen, shuffle: false, bs: 2*bs)
    }
}

In [10]:

//export
public func mnistDataBunch(path: Path = mnistPath, flat: Bool = false, bs: Int = 64
                          ) -> DataBunch<DataBatch<TF, TI>>{
    let (xTrain,yTrain,xValid,yValid) = loadMNIST(path: path, flat: flat)
    return DataBunch(train: Dataset(elements:DataBatch(xb:xTrain, yb:yTrain)), 
                     valid: Dataset(elements:DataBatch(xb:xValid, yb:yValid)),
                     trainLen: xTrain.shape[0],
                     validLen: xValid.shape[0],
                     bs: bs)
}

In [11]:

let data = mnistDataBunch(flat: true)

In [12]:

data.train.count

Out[12]:

Shuffle test

In [13]:

time(repeating: 10) {
  var tst = data.train.ds
  var firstBatch: DataBatch<TF,TI>? = nil
  for batch in tst{
      firstBatch = batch
      break
  }
  firstBatch!.yb
}

232.35490609999997 ms

In [14]:

var tst = data.train.ds
var firstBatch: DataBatch<TF, TI>? = nil
for batch in tst{
    firstBatch = batch
    break
}
firstBatch!.yb

Out[14]:

[0, 2, 0, 9, 5, 1, 9, 2, 3, 9, 3, 1, 6, 6, 9, 5, 5, 5, 7, 9, 7, 0, 4, 1, 5, 9, 9, 2, 7, 6, 3, 7, 2, 1, 0, 2, 1, 5, 9, 3, 7, 4, 4, 3, 8, 7, 6, 3, 1, 1, 1, 2, 1, 0, 1, 9, 7, 1, 8, 9, 2, 0, 3, 4]

Learner¶

In [15]:

// export
public enum LearnerAction: Error {
    case skipEpoch
    case skipBatch
    case stop
}

Basic class

In [16]:

// export
/// A model learner, responsible for initializing and training a model on a given dataset.
public final class Learner<Label: TensorGroup,
                           Opt: TensorFlow.Optimizer & AnyObject>
    where Opt.Scalar: Differentiable,
          // Constrain model input to Tensor<Float>, to work around
          // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
          Opt.Model.Input == Tensor<Float>
{
    // Common type aliases.
    public typealias Input = Model.Input
    public typealias Data = DataBunch<DataBatch<Input, Label>>
    public typealias Loss = Tensor<Float>
    public typealias Optimizer = Opt
    public typealias Model = Optimizer.Model
    public typealias Variables = Model.AllDifferentiableVariables
    public typealias EventHandler = (Learner) throws -> Void
    
    /// A wrapper class to hold the loss function, to work around
    // https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
    public final class LossFunction {
        public typealias F = @differentiable (Model.Output, @nondiff Label) -> Loss
        public var f: F
        init(_ f: @escaping F) { self.f = f }
    }
    
    /// The dataset on which the model will be trained.
    public var data: Data
    /// The optimizer used for updating model parameters along gradient vectors.
    public var optimizer: Optimizer
    /// The function that computes a loss value when given a prediction and a label.
    public var lossFunction: LossFunction
    /// The model being trained.
    public var model: Model
    
    //Is there a better way to initialize those to not make them Optionals?
    public var currentInput: Input? = nil
    public var currentTarget: Label? = nil
    public var currentOutput: Model.Output? = nil
    
    /// The number of total epochs.
    public private(set) var epochCount: Int = .zero
    /// The current epoch.
    public private(set) var currentEpoch: Int = .zero
    /// The current gradient.
    public private(set) var currentGradient: Model.CotangentVector = .zero
    /// The current loss.
    public private(set) var currentLoss: Loss = .zero
    /// In training mode or not
    public private(set) var inTrain: Bool = false
    /// The current epoch + iteration, float between 0.0 and epochCount
    public private(set) var pctEpochs: Float = 0.0
    /// The current iteration
    public private(set) var currentIter: Int = 0
    /// The number of iterations in the current dataset
    public private(set) var iterCount: Int = 0
    
    open class Delegate {
        open var order: Int { return 0 }
        public init () {}
        
        open func trainingWillStart(learner: Learner) throws {}
        /// The completion of model training.
        open func trainingDidFinish(learner: Learner) throws {}
        /// A closure which will be called upon the start of an epoch.
        open func epochWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the completion of an epoch.
        open func epochDidFinish(learner: Learner) throws {}
        /// A closure which will be called upon the start of model validation.
        open func validationWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the start of training on a batch.
        open func batchWillStart(learner: Learner) throws {}
        /// A closure which will be called upon the completion of training on a batch.
        open func batchDidFinish(learner: Learner) throws {}
        /// A closure which will be called when a new gradient has been computed.
        open func didProduceNewGradient(learner: Learner) throws {}
        /// A closure which will be called upon the completion of an optimizer update.
        open func optimizerDidUpdate(learner: Learner) throws {}
        ///
        /// TODO: learnerDidProduceNewOutput and learnerDidProduceNewLoss need to
        /// be differentiable once we can have the loss function inside the Learner
    }
    
    public var delegates: [Delegate] = [] {
        didSet { delegates.sort { $0.order < $1.order } }
    }
    
    /// Creates a learner.
    ///
    /// - Parameters:
    ///   - data: The databunch used for training and validation.
    ///   - lossFunction: The loss function.
    ///   - optimizer: The optimizer used for updating model parameters.
    ///   - modelInitializer: The closure that produces the model to be trained.
    public init(data: Data,
                lossFunction: @escaping LossFunction.F,
                optimizer: Optimizer,
                initializingWith modelInitializer: () -> Model) {
        self.data = data
        self.optimizer = optimizer
        self.lossFunction = LossFunction(lossFunction)
        self.model = modelInitializer()
    }
}

Then let's write the parts of the training loop:

In [17]:

// export
extension Learner {
    /// Trains the model on the given batch.
    ///
    /// - Parameter batch: The batch of input data and labels to be trained on.
    ///
    private func evaluate(onBatch batch: DataBatch<Input, Label>) throws {
        currentOutput = model.applied(to: currentInput!)
        currentLoss = lossFunction.f(currentOutput!, currentTarget!)
    }
    
    private func train(onBatch batch: DataBatch<Input, Label>) throws {
        let (xb,yb) = (currentInput!,currentTarget!)
        (currentLoss, currentGradient) = model.valueWithGradient { model -> Loss in 
            let y = model.applied(to: xb)                                      
            currentOutput = y
            return lossFunction.f(y, yb)
        }
        try delegates.forEach { try $0.didProduceNewGradient(learner: self) }
        optimizer.update(&model.allDifferentiableVariables, along: self.currentGradient)
    }
    
    /// Performs a training epoch on a Dataset.
    private func train(onDataset ds: FADataset<DataBatch<Input, Label>>) throws {
        iterCount = ds.count
        for batch in ds.ds {
            (currentInput, currentTarget) = (batch.xb, batch.yb)
            try delegates.forEach { try $0.batchWillStart(learner: self) }
            do { if inTrain { try train(onBatch: batch) } else { try evaluate(onBatch: batch) }}
            catch LearnerAction.skipBatch {}
            try delegates.forEach { try $0.batchDidFinish(learner: self) }
        }
    }
}

And the whole fit function.

In [18]:

// export
extension Learner {
    /// Starts fitting.
    /// - Parameter epochCount: The number of epochs that will be run.
    public func fit(_ epochCount: Int) throws {
        self.epochCount = epochCount
        do {
            try delegates.forEach { try $0.trainingWillStart(learner: self) }
            for i in 0..<epochCount {
                self.currentEpoch = i
                do {
                    try delegates.forEach { try $0.epochWillStart(learner: self) }
                    do { try train(onDataset: data.train) }
                    try delegates.forEach { try $0.validationWillStart(learner: self) }
                    do { try train(onDataset: data.valid) }
                    
                } catch LearnerAction.skipEpoch {}
                try delegates.forEach { try $0.epochDidFinish(learner: self) }
            }
        } catch LearnerAction.stop {}
        try delegates.forEach { try $0.trainingDidFinish(learner: self) }
    }
}

Test¶

In [19]:

let opt = SGD<BasicModel, Float>(learningRate: 1e-2)

In [20]:

func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: Int(c))}

In [21]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [22]:

learner.fit(2)

Let's add Callbacks!¶

Extension with convenience methods to add delegates:

In [23]:

// export
public extension Learner {
    func addDelegate(_ delegate: Learner.Delegate) {
        delegates.append(delegate)
    }
    
    func addDelegates(_ delegates: [Learner.Delegate]) {
        self.delegates += delegates
    }
}

Train/eval¶

Callback classes are defined as extensions of the Learner.

In [24]:

// export
extension Learner {
    public class TrainEvalDelegate: Delegate {
        public override func trainingWillStart(learner: Learner) {
            learner.pctEpochs = 0.0
        }

        public override func epochWillStart(learner: Learner) {
            Context.local.learningPhase = .training
            learner.pctEpochs = Float(learner.currentEpoch)
            learner.inTrain = true
            learner.currentIter = 0
        }
        
        public override func batchDidFinish(learner: Learner) {
            learner.currentIter += 1
            if learner.inTrain{ learner.pctEpochs += 1.0 / Float(learner.iterCount) }
        }
        
        public override func validationWillStart(learner: Learner) {
            Context.local.learningPhase = .inference
            learner.inTrain = false
            learner.currentIter = 0
        }
    }
    
    public func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }
}

In [25]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [26]:

learner.delegates = [learner.makeTrainEvalDelegate()]

In [27]:

learner.fit(2)

AverageMetric¶

In [28]:

// export
// TODO: make metrics more generic (probably for after the course)
extension Learner {
    public class AvgMetric: Delegate {
        public let metrics: [(TF,TI) -> TF]
        var total: Int = 0
        var partials: [TF] = []
        
        public init(metrics: [(TF, TI) -> TF]){ self.metrics = metrics}
        
        public override func epochWillStart(learner: Learner) {
            total = 0
            partials = Array(repeating: Tensor(0), count: metrics.count + 1)
        }
        
        public override func batchDidFinish(learner: Learner) {
            if !learner.inTrain{
                if let target = learner.currentTarget as? TI{
                    let bs = target.shape[0]
                    total += bs
                    partials[0] += Float(bs) * learner.currentLoss
                    for i in 1...metrics.count{
                        partials[i] += Float(bs) * metrics[i-1]((learner.currentOutput as! TF), target)
                    }
                }
            }
        }
        
        public override func epochDidFinish(learner: Learner) {
            for i in 0...metrics.count {partials[i] = partials[i] / Float(total)}
            print("Epoch \(learner.currentEpoch): \(partials)")
        }
    }
    
    public func makeAvgMetric(metrics: [(TF,TI) -> TF]) -> AvgMetric{
        return AvgMetric(metrics: metrics)
    }
}

In [29]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [30]:

learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy])]

In [31]:

learner.fit(2)

Epoch 0: [0.49971578, 0.8738]
Epoch 1: [0.38159794, 0.8953]

Normalization¶

In [32]:

// export
extension Learner {
    public class Normalize: Delegate {
        public let mean, std: TF
        public init(mean: TF, std: TF){ 
            (self.mean,self.std) = (mean,std)
        }
        
        public override func batchWillStart(learner: Learner) {
            learner.currentInput = (learner.currentInput! - mean) / std
        }
    }
    
    public func makeNormalize(mean: TF, std: TF) -> Normalize{
        return Normalize(mean: mean, std: std)
    }
}

In [33]:

(xTrain.mean(), xTrain.standardDeviation())

Out[33]:

▿ 2 elements
  - .0 : 0.13066047
  - .1 : 0.3081079

In [34]:

// export
public let mnistStats = (mean: TF(0.13066047), std: TF(0.3081079))

In [35]:

let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)

In [36]:

learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),
                     learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]

In [37]:

learner.fit(2)

Epoch 0: [0.2994869, 0.9128]
Epoch 1: [0.2483886, 0.9272]

Export¶

In [38]:

notebookToScript(fname: (Path.cwd / "04_callbacks.ipynb").string)

In [ ]: