%install '.package(path: "$cwd/FastaiNotebook_03_minibatch_training")' FastaiNotebook_03_minibatch_training
Installing packages: .package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_03_minibatch_training") FastaiNotebook_03_minibatch_training With SwiftPM flags: [] Working in: /tmp/tmpaz6m2tdz Fetching https://github.com/mxcl/Path.swift Fetching https://github.com/JustHTTP/Just Completed resolution in 2.39s Cloning https://github.com/JustHTTP/Just Resolving https://github.com/JustHTTP/Just at 0.7.1 Cloning https://github.com/mxcl/Path.swift Resolving https://github.com/mxcl/Path.swift at 0.16.2 Compile Swift Module 'Just' (1 sources) Compile Swift Module 'Path' (9 sources) Compile Swift Module 'FastaiNotebook_03_minibatch_training' (6 sources) Compile Swift Module 'jupyterInstalledPackages' (1 sources) Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so Initializing Swift... Loading library... Installation complete!
import FastaiNotebook_03_minibatch_training
// export
import Path
import TensorFlow
var (xTrain,yTrain,xValid,yValid) = loadMNIST(path: mnistPath, flat: true)
let (n,m) = (Int(xTrain.shape[0]),Int(xTrain.shape[1]))
let c = yTrain.max()+1
print(n,m,c)
60000 784 10
Those can't be used to define a model cause they're not Ints though...
let (n,m) = (60000,784)
let c = 10
let nHid = 50
// export
public struct BasicModel: Layer {
public var layer1: Dense<Float>
public var layer2: Dense<Float>
public init(nIn: Int, nHid: Int, nOut: Int){
layer1 = Dense(inputSize: nIn, outputSize: nHid, activation: relu)
layer2 = Dense(inputSize: nHid, outputSize: nOut)
}
@differentiable
public func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
return input.sequenced(in: context, through: layer1, layer2)
}
}
var model = BasicModel(nIn: m, nHid: nHid, nOut: c)
// export
public struct DataBunch<Element> where Element: TensorGroup{
private var _train: Dataset<Element>
private var _valid: Dataset<Element>
public var shuffleTrain: Bool = true
public var shuffleValid: Bool = false
public var batchSize: Int = 64
public var train: Dataset<Element> { return processDs(_train, shuffleTrain) }
public var valid: Dataset<Element> { return processDs(_valid, shuffleValid) }
private func processDs(_ ds: Dataset<Element>, _ shuffle: Bool) -> Dataset<Element>{
if !shuffle { return ds.batched(Int64(batchSize))}
let count = Int64(ds.count(where: {_ in true}))
return ds.batched(Int64(batchSize)).shuffled(sampleCount: count, randomSeed: Int64(random()))
}
public init(train: Dataset<Element>, valid: Dataset<Element>, batchSize: Int = 64) {
(self._train, self._valid, self.batchSize) = (train, valid, batchSize)
}
}
//export
public func mnistDataBunch(path: Path = mnistPath, flat: Bool = false, bs: Int = 64
) -> DataBunch<DataBatch<Tensor<Float>, Tensor<Int32>>>{
let (xTrain,yTrain,xValid,yValid) = loadMNIST(path: path, flat: flat)
return DataBunch(train: Dataset(elements:DataBatch(xb:xTrain, yb:yTrain)),
valid: Dataset(elements:DataBatch(xb:xValid, yb:yValid)),
batchSize: bs)
}
let data = mnistDataBunch(flat: true)
Shuffle test
var tst = data.train
var firstBatch: DataBatch<Tensor<Float>, Tensor<Int32>>? = nil
for batch in tst{
firstBatch = batch
break
}
firstBatch!.yb
[8, 8, 1, 7, 9, 8, 8, 1, 2, 0, 4, 0, 9, 6, 9, 7, 0, 7, 4, 5, 4, 0, 3, 6, 9, 1, 7, 1, 9, 2, 3, 5, 5, 0, 9, 7, 0, 0, 4, 4, 1, 4, 5, 1, 0, 8, 3, 6, 4, 0, 8, 0, 1, 0, 7, 3, 2, 7, 6, 1, 7, 6, 7, 4]
var tst = data.train
var firstBatch: DataBatch<Tensor<Float>, Tensor<Int32>>? = nil
for batch in tst{
firstBatch = batch
break
}
firstBatch!.yb
[8, 0, 9, 7, 2, 5, 2, 8, 8, 8, 7, 3, 3, 7, 3, 8, 8, 9, 8, 2, 6, 5, 2, 3, 8, 1, 9, 7, 9, 3, 1, 9, 6, 1, 9, 9, 5, 9, 6, 6, 7, 0, 4, 9, 4, 4, 6, 3, 0, 7, 4, 0, 0, 4, 0, 1, 6, 4, 5, 3, 3, 5, 3, 4]
// export
public enum LearnerAction: Error {
case skipEpoch
case skipBatch
case stop
}
Basic class
// export
/// A model learner, responsible for initializing and training a model on a given dataset.
public final class Learner<Label: TensorGroup,
Opt: TensorFlow.Optimizer & AnyObject>
where Opt.Scalar: Differentiable,
// Constrain model input to Tensor<Float>, to work around
// https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
Opt.Model.Input == Tensor<Float>
{
// Common type aliases.
public typealias Input = Model.Input
public typealias Data = DataBunch<DataBatch<Input, Label>>
public typealias Loss = Tensor<Float>
public typealias Optimizer = Opt
public typealias Model = Optimizer.Model
public typealias Variables = Model.AllDifferentiableVariables
public typealias EventHandler = (Learner) throws -> Void
/// A wrapper class to hold the loss function, to work around
// https://forums.fast.ai/t/fix-ad-crash-in-learner/42970.
public final class LossFunction {
public typealias F = @differentiable (Model.Output, @nondiff Label) -> Loss
public var f: F
init(_ f: @escaping F) { self.f = f }
}
/// The dataset on which the model will be trained.
public var data: Data
/// The optimizer used for updating model parameters along gradient vectors.
public var optimizer: Optimizer
/// The function that computes a loss value when given a prediction and a label.
public var lossFunction: LossFunction
/// The model being trained.
public var model: Model
//Is there a better way to initialize those to not make them Optionals?
public var currentInput: Input? = nil
public var currentTarget: Label? = nil
public var currentOutput: Model.Output? = nil
/// The number of total epochs.
public private(set) var epochCount: Int = .zero
/// The current epoch.
public private(set) var currentEpoch: Int = .zero
/// The current gradient.
public private(set) var currentGradient: Model.CotangentVector = .zero
/// The current loss.
public private(set) var currentLoss: Loss = .zero
/// In training mode or not
public private(set) var inTrain: Bool = false
/// The current epoch + iteration, float between 0.0 and epochCount
public private(set) var pctEpochs: Float = 0.0
/// The current iteration
public private(set) var currentIter: Int = 0
/// The number of iterations in the current dataset
public private(set) var iterCount: Int = 0
open class Delegate {
open var order: Int { return 0 }
public init () {}
open func trainingWillStart(learner: Learner) throws {}
/// The completion of model training.
open func trainingDidFinish(learner: Learner) throws {}
/// A closure which will be called upon the start of an epoch.
open func epochWillStart(learner: Learner) throws {}
/// A closure which will be called upon the completion of an epoch.
open func epochDidFinish(learner: Learner) throws {}
/// A closure which will be called upon the start of model validation.
open func validationWillStart(learner: Learner) throws {}
/// A closure which will be called upon the start of training on a batch.
open func batchWillStart(learner: Learner) throws {}
/// A closure which will be called upon the completion of training on a batch.
open func batchDidFinish(learner: Learner) throws {}
/// A closure which will be called when a new gradient has been computed.
open func didProduceNewGradient(learner: Learner) throws {}
/// A closure which will be called upon the completion of an optimizer update.
open func optimizerDidUpdate(learner: Learner) throws {}
///
/// TODO: learnerDidProduceNewOutput and learnerDidProduceNewLoss need to
/// be differentiable once we can have the loss function inside the Learner
}
public var delegates: [Delegate] = [] {
didSet { delegates.sort { $0.order < $1.order } }
}
/// The context used for layer applications.
public private(set) var context = Context(learningPhase: .training)
/// Creates a learner.
///
/// - Parameters:
/// - dataset: The dataset which will be trained on.
/// - lossFunction: The loss function.
/// - optimizer: The optimizer used for updating model parameters along
/// gradient vectors.
/// - modelInitializer: The closure that produces an model to be trained.
///
public init(data: Data,
lossFunction: @escaping LossFunction.F,
optimizer: Optimizer,
initializingWith modelInitializer: () -> Model) {
self.data = data
self.optimizer = optimizer
self.lossFunction = LossFunction(lossFunction)
self.model = modelInitializer()
}
}
Then let's write the parts of the training loop:
// export
extension Learner {
/// Trains the model on the given batch.
///
/// - Parameter batch: The batch of input data and labels to be trained on.
///
private func evaluate(onBatch batch: DataBatch<Input, Label>) throws {
currentOutput = model.applied(to: currentInput!, in: context)
currentLoss = lossFunction.f(currentOutput!, currentTarget!)
}
private func train(onBatch batch: DataBatch<Input, Label>) throws {
let (xb,yb) = (currentInput!,currentTarget!)
(currentLoss, currentGradient) = model.valueWithGradient { model -> Loss in
let y = model.applied(to: xb, in: context)
currentOutput = y
return lossFunction.f(y, yb)
}
try delegates.forEach { try $0.didProduceNewGradient(learner: self) }
optimizer.update(&model.allDifferentiableVariables, along: self.currentGradient)
}
/// Performs a training epoch on a Dataset.
private func train(onDataset ds: Dataset<DataBatch<Input, Label>>) throws {
iterCount = ds.count(where: {_ in true})
for batch in ds {
(currentInput, currentTarget) = (batch.xb, batch.yb)
try delegates.forEach { try $0.batchWillStart(learner: self) }
do { if inTrain { try train(onBatch: batch) } else { try evaluate(onBatch: batch) }}
catch LearnerAction.skipBatch {}
try delegates.forEach { try $0.batchDidFinish(learner: self) }
}
}
}
And the whole fit function.
// export
extension Learner {
/// Starts fitting.
/// - Parameter epochCount: The number of epochs that will be run.
public func fit(_ epochCount: Int) throws {
self.epochCount = epochCount
do {
try delegates.forEach { try $0.trainingWillStart(learner: self) }
for i in 0..<epochCount {
self.currentEpoch = i
do {
try delegates.forEach { try $0.epochWillStart(learner: self) }
do { try train(onDataset: data.train) }
try delegates.forEach { try $0.validationWillStart(learner: self) }
do { try train(onDataset: data.valid) }
} catch LearnerAction.skipEpoch {}
try delegates.forEach { try $0.epochDidFinish(learner: self) }
}
} catch LearnerAction.stop {}
try delegates.forEach { try $0.trainingDidFinish(learner: self) }
}
}
let opt = SGD<BasicModel, Float>(learningRate: 1e-2)
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
learner.fit(2)
Callback classes are defined as extensions of the Learner.
// export
extension Learner {
public class TrainEvalDelegate: Delegate {
public override func trainingWillStart(learner: Learner) {
learner.pctEpochs = 0.0
}
public override func epochWillStart(learner: Learner) {
learner.pctEpochs = Float(learner.currentEpoch)
learner.context = Context(learningPhase: .training)
learner.inTrain = true
learner.currentIter = 0
}
public override func batchDidFinish(learner: Learner) {
learner.currentIter += 1
if learner.inTrain{
learner.pctEpochs += 1.0 / Float(learner.iterCount)
}
}
public override func validationWillStart(learner: Learner) {
learner.context = Context(learningPhase: .inference)
learner.inTrain = false
learner.currentIter = 0
}
}
public func makeTrainEvalDelegate() -> TrainEvalDelegate { return TrainEvalDelegate() }
}
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
learner.delegates = [learner.makeTrainEvalDelegate()]
learner.fit(2)
// export
// TODO: make metrics more generic (probably for after the course)
extension Learner {
public class AvgMetric: Delegate {
public let metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]
var total: Int = 0
var partials: [Tensor<Float>] = []
public init(metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]){ self.metrics = metrics}
public override func epochWillStart(learner: Learner) {
total = 0
partials = Array(repeating: Tensor(0), count: metrics.count + 1)
}
public override func batchDidFinish(learner: Learner) {
if !learner.inTrain{
if let target = learner.currentTarget as? Tensor<Int32>{
let bs = target.shape[0]
total += Int(bs)
partials[0] += Float(bs) * learner.currentLoss
for i in 1...metrics.count{
partials[i] += Float(bs) * metrics[i-1]((learner.currentOutput as! Tensor<Float>), target)
}
}
}
}
public override func epochDidFinish(learner: Learner) {
for i in 0...metrics.count {partials[i] = partials[i] / Float(total)}
print("Epoch \(learner.currentEpoch): \(partials)")
}
}
public func makeAvgMetric(metrics: [(Tensor<Float>, Tensor<Int32>) -> Tensor<Float>]) -> AvgMetric{
return AvgMetric(metrics: metrics)
}
}
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy])]
learner.fit(2)
Epoch 0: [0.48076352, 0.8779] Epoch 1: [0.36723757, 0.8996]
// export
// TODO: make metrics more generic (probably for after the course)
extension Learner {
public class Normalize: Delegate {
public let mean, std: Tensor<Float>
public init(mean: Tensor<Float>, std: Tensor<Float>){
(self.mean,self.std) = (mean,std)
}
public override func batchWillStart(learner: Learner) {
learner.currentInput = (learner.currentInput! - mean) / std
}
}
public func makeNormalize(mean: Tensor<Float>, std: Tensor<Float>) -> Normalize{
return Normalize(mean: mean, std: std)
}
}
(xTrain.mean(), xTrain.standardDeviation())
▿ 2 elements - .0 : 0.13066047 - .1 : [[0.3081079]]
// export
public let mnistStats = (mean: Tensor<Float>(0.13066047), std: Tensor<Float>(0.3081079))
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
learner.delegates = [learner.makeTrainEvalDelegate(), learner.makeAvgMetric(metrics: [accuracy]),
learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)]
learner.fit(2)
Epoch 0: [0.30538177, 0.9112] Epoch 1: [0.2491324, 0.9276]
notebookToScript(fname: (Path.cwd / "04_callbacks.ipynb").string)