%install '.package(path: "$cwd/FastaiNotebook_05b_early_stopping")' FastaiNotebook_05b_early_stopping import FastaiNotebook_05b_early_stopping %include "EnableIPythonDisplay.swift" IPythonDisplay.shell.enable_matplotlib("inline") // export import Path import TensorFlow import Python let plt = Python.import("matplotlib.pyplot") let data = mnistDataBunch(flat: false, bs: 512) let firstBatch = data.train.first(where: { _ in true })! let batchShape = firstBatch.xb.shape let batchSize = batchShape.dimensions[0] let exampleSideSize = batchShape.dimensions[1] assert(exampleSideSize == batchShape.dimensions[2]) print("Batch size: \(batchSize)") print("Example side size: \(exampleSideSize)") let classCount = firstBatch.yb.shape.dimensions[0] print("Class count: \(classCount)") firstBatch.xb.shape // export extension Learner { public class AddChannel: Delegate { public override func batchWillStart(learner: Learner) { learner.currentInput = learner.currentInput!.expandingShape(at: -1) } } public func makeAddChannel() -> AddChannel { return AddChannel() } } //export public func conv(_ cIn: Int, _ cOut: Int, ks: Int = 3, stride: Int = 2) -> FAConv2D { return FAConv2D(filterShape: (ks, ks, cIn, cOut), strides: (stride,stride), padding: .same, activation: relu) } public struct CnnModel: Layer { public var convs: [FAConv2D] public var pool = FAAdaptiveAvgPool2D() public var flatten = Flatten() public var linear: FADense public init(channelIn: Int, nOut: Int, filters: [Int]){ convs = [] let allFilters = [channelIn] + filters for i in 0..(inputSize: filters.last!, outputSize: nOut) } @differentiable public func applied(to input: TF) -> TF { return input.sequenced(through: convs, pool, flatten, linear) } } let model = CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32]) // Test that data goes through the model as expected. let predictions = model.applied(to: firstBatch.xb.expandingShape(at: -1)) print(predictions.shape) print(predictions[0]) let opt = SimpleSGD(learningRate: 0.4) func modelInit() -> CnnModel { return CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32]) } let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std), learner.makeAddChannel()]) // This happens on the GPU (if you have one and it's configured correctly). // I tried this on a GCE 8vCPU 30GB + Tesla P100: // - time: ~4.3s // - nvidia-smi shows ~10% GPU-Util while this is running time { try! learner.fit(1) } // This happens on the CPU. // I tried this on a GCE 8vCPU 30GB + Tesla P100: // - time: ~6.3s // - nvidia-smi shows 0% GPU-Util while this is running time { withDevice(.cpu) { try! learner.fit(1) } } class ActivationStatistics: LayerDelegate> { var activationMeans: [Float] = [] var activationStds: [Float] = [] override func didProduceActivation(_ activation: Tensor) { activationMeans.append(activation.mean().scalar!) activationStds.append(activation.standardDeviation().reshaped(to: []).scalar!) } } extension KeyPathIterable { mutating func initializeLayerDelegates(with initializer: () -> LayerDelegate) { for kp in recursivelyAllWritableKeyPaths(to: LayerDelegate.self) { self[keyPath: kp] = initializer() } } func layerDelegates>(havingType: D.Type) -> [D] { var result: [D] = [] for kp in recursivelyAllWritableKeyPaths(to: LayerDelegate.self) { guard let d = self[keyPath: kp] as? D else { continue } result.append(d) } return result } } let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std), learner.makeAddChannel()]) learner.model.initializeLayerDelegates(with: { ActivationStatistics() }) // This LayerDelegate stuff slows it down to ~6s/epoch. time { try! learner.fit(2) } let activationStatistics = learner.model.layerDelegates(havingType: ActivationStatistics.self) for stats in activationStatistics { plt.plot(stats.activationMeans) } plt.legend(Array(1...activationStatistics.count)) plt.show() for stats in activationStatistics { plt.plot(stats.activationStds) } plt.legend(Array(1...activationStatistics.count)) plt.show() notebookToScript(fname: (Path.cwd / "06_cuda.ipynb").string)