%install '.package(path: "$cwd/FastaiNotebook_08a_heterogeneous_dictionary")' FastaiNotebook_08a_heterogeneous_dictionary import FastaiNotebook_08a_heterogeneous_dictionary %include "EnableIPythonDisplay.swift" IPythonDisplay.shell.enable_matplotlib("inline") // export import Path import TensorFlow let path = downloadImagette() let il = ItemList(fromFolder: path, extensions: ["jpeg", "jpg"]) let sd = SplitData(il, fromFunc: {grandParentSplitter(fName: $0, valid: "val")}) var (procItem,procLabel) = (NoopProcessor(),CategoryProcessor()) let sld = SplitLabeledData(sd, fromFunc: parentLabeler, procItem: &procItem, procLabel: &procLabel) var rawData = sld.toDataBunch(itemToTensor: pathsToTensor, labelToTensor: intsToTensor) let data = transformData(rawData, tfmItem: { openAndResize(fname: $0, size: 128) }) let data = mnistDataBunch(flat: true) let (n,m) = (60000,784) let c = 10 let nHid = 50 func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)} //export open class StatDelegate { open var name: String { return "" } var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() } func update( state: inout [String: Tensor], for param: Tensor, along direction: Tensor, config: inout HeterogeneousDictionary ) { } } //export open class StepDelegate { var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() } func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { } } //export class StatefulOptimizer: Optimizer where Model.AllDifferentiableVariables == Model.CotangentVector{ var configs: [HeterogeneousDictionary] var learningRate: Float { get { return configs.last![LearningRate()] } set { for i in configs.indices {self.configs[i][LearningRate()] = newValue } } } var learningRates: [Float] { get { var res: [Float] = [] for config in configs {res.append(config[LearningRate()])} return res } set { for i in configs.indices {self.configs[i][LearningRate()] = newValue[i] } } } var splits: (Int) -> Int var states: [String: Model.AllDifferentiableVariables] var statDelegates: [StatDelegate] var stepDelegates: [StepDelegate] init( stepDelegates: [StepDelegate], statDelegates: [StatDelegate], configs: [HeterogeneousDictionary], splits: @escaping (Int) -> Int ) { self.configs = Array(repeating: HeterogeneousDictionary(), count: configs.count) states = [:] for stepDelegate in stepDelegates { for i in self.configs.indices { self.configs[i].merge(stepDelegate.defaultConfig) { (_, new) in new } } } for statDelegate in statDelegates { for i in self.configs.indices { self.configs[i].merge(statDelegate.defaultConfig) { (_, new) in new } } states[statDelegate.name] = Model.AllDifferentiableVariables.zero } for i in 0...self).enumerated() { var grad = direction[keyPath: kp] var state = states.mapValues(){$0[keyPath: kp]} var config = configs[splits(i)] for statDelegate in statDelegates { statDelegate.update( state: &state, for: model[keyPath: kp], along: grad, config: &config ) } for n in states.keys { states[n]![keyPath: kp] = state[n]! } for stepDelegate in stepDelegates { stepDelegate.update( param: &model[keyPath: kp], along: &grad, state: state, config: &config ) } } } } //export class SGDStep: StepDelegate { override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { param -= direction * config[LearningRate()] } } //export public struct WeightDecayKey: HetDictKey, Equatable { public static var defaultValue: Float = 0.0 } class WeightDecay: StepDelegate { override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { param *= 1 - config[LearningRate()] * config[WeightDecayKey()] } } //export class L2Regularization: StepDelegate { override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { direction += config[WeightDecayKey()] * param } } //export public struct Momentum: HetDictKey, Equatable { public static var defaultValue: Float = 0.9 } public struct MomentumDampening: HetDictKey, Equatable { public static var defaultValue: Float = 0.9 } class AverageGrad: StatDelegate { let dampened: Bool init(dampened: Bool = false) { self.dampened = dampened } override var name: String { return "averageGrad" } override func update( state: inout [String: Tensor], for param: Tensor, along direction: Tensor, config: inout HeterogeneousDictionary ) { state["averageGrad"]! *= config[Momentum()] config[MomentumDampening()] = 1.0 - (dampened ? config[Momentum()] : 0.0) state["averageGrad"]! += config[MomentumDampening()] * direction } } func split_func(_ a: Int) -> Int { return a < 2 ? 0 : 1 } var configs = [HeterogeneousDictionary(LearningRate(), 0.0), HeterogeneousDictionary(LearningRate(), 0.01)] let opt = StatefulOptimizer(stepDelegates: [SGDStep()], statDelegates: [], configs: configs, splits: split_func) let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) let params = learner.model.allDifferentiableVariables for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { print(params[keyPath: kp][0]) } learner.fit(2) let params = learner.model.allDifferentiableVariables for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { print(params[keyPath: kp][0]) } //export class MomentumStep: StepDelegate { override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { param -= config[LearningRate()] * state["averageGrad"]! } } let opt = StatefulOptimizer(stepDelegates: [MomentumStep()], statDelegates: [AverageGrad()], config: HeterogeneousDictionary(LearningRate(), 0.01)) let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.fit(2) //export public struct SquareMomentum: HetDictKey, Equatable { public static var defaultValue: Float = 0.99 } public struct SquareMomentumDampening: HetDictKey, Equatable { public static var defaultValue: Float = 0.99 } class AverageSquaredGrad: StatDelegate { let dampened: Bool init(dampened: Bool = false) { self.dampened = dampened } override var name: String { return "averageSquaredGrad" } override func update( state: inout [String: Tensor], for param: Tensor, along direction: Tensor, config: inout HeterogeneousDictionary ) { state["averageSquaredGrad"]! *= config[SquareMomentum()] config[SquareMomentumDampening()] = 1.0 - (dampened ? config[SquareMomentum()] : 0.0) state["averageSquaredGrad"]! += config[SquareMomentumDampening()] * direction.squared() } } //export class StepCount: StatDelegate { override var name: String { return "step" } override func update( state: inout [String: Tensor], for param: Tensor, along direction: Tensor, config: inout HeterogeneousDictionary ) { state["step"]! += 1.0 } } //export func debias( momentum: Scalar, dampening: Scalar, step: Tensor ) -> Tensor { return dampening * (1 - pow(momentum, step)) / (1 - momentum) } //export public struct Epsilon: HetDictKey, Equatable { public static var defaultValue: Float = 1e-5 } class AdamStep: StepDelegate { override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { let debiasedLearningRate = config[LearningRate()] / debias( momentum: config[Momentum()], dampening: config[MomentumDampening()], step: state["step"]! ) let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias( momentum: config[SquareMomentum()], dampening: config[SquareMomentumDampening()], step: state["step"]! )) + config[Epsilon()] param -= debiasedLearningRate * state["averageGrad"]! / debiasedRMSGrad } } let opt = StatefulOptimizer( stepDelegates: [AdamStep()], statDelegates: [AverageGrad(), AverageSquaredGrad(), StepCount()], config: HeterogeneousDictionary(LearningRate(), 0.01)) let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.fit(2) class LambStep: StepDelegate { override var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary(Epsilon(), 1e-6, WeightDecayKey(), 0.0) } override func update( param: inout Tensor, along direction: inout Tensor, state: [String: Tensor], config: inout HeterogeneousDictionary ) { let debiasedAverageGrad = state["averageGrad"]! / debias( momentum: config[Momentum()], dampening: config[MomentumDampening()], step: state["step"]! ) let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias( momentum: config[SquareMomentum()], dampening: config[SquareMomentumDampening()], step: state["step"]! ) + config[Epsilon()]) let step = debiasedAverageGrad / debiasedRMSGrad + config[WeightDecayKey()] * param let r1 = sqrt((param * param).mean()) let r2 = sqrt((step * step).mean()) let factor = min(r1 / r2, Float(10.0)) param -= config[LearningRate()] * factor * step } } notebookToScript(fname: (Path.cwd / "09_optimizer.ipynb").string)