%install '.package(path: "$cwd/FastaiNotebook_08a_heterogeneous_dictionary")' FastaiNotebook_08a_heterogeneous_dictionary
Installing packages: .package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebook_08a_heterogeneous_dictionary") FastaiNotebook_08a_heterogeneous_dictionary With SwiftPM flags: [] Working in: /tmp/tmperd7354d/swift-install Fetching https://github.com/mxcl/Path.swift Fetching https://github.com/JustHTTP/Just Completed resolution in 4.11s Cloning https://github.com/mxcl/Path.swift Resolving https://github.com/mxcl/Path.swift at 0.16.2 Cloning https://github.com/JustHTTP/Just Resolving https://github.com/JustHTTP/Just at 0.7.1 Compile Swift Module 'Just' (1 sources) Compile Swift Module 'Path' (9 sources) Compile Swift Module 'FastaiNotebook_08a_heterogeneous_dictionary' (14 sources) Compile Swift Module 'jupyterInstalledPackages' (1 sources) Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so Initializing Swift... Installation complete!
import FastaiNotebook_08a_heterogeneous_dictionary
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")
('inline', 'module://ipykernel.pylab.backend_inline')
// export
import Path
import TensorFlow
let path = downloadImagette()
let il = ItemList(fromFolder: path, extensions: ["jpeg", "jpg"])
let sd = SplitData(il, fromFunc: {grandParentSplitter(fName: $0, valid: "val")})
var (procItem,procLabel) = (NoopProcessor<Path>(),CategoryProcessor())
let sld = SplitLabeledData(sd, fromFunc: parentLabeler, procItem: &procItem, procLabel: &procLabel)
var rawData = sld.toDataBunch(itemToTensor: pathsToTensor, labelToTensor: intsToTensor)
let data = transformData(rawData, tfmItem: { openAndResize(fname: $0, size: 128) })
let data = mnistDataBunch(flat: true)
let (n,m) = (60000,784)
let c = 10
let nHid = 50
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}
//export
open class StatDelegate<Scalar: TensorFlowFloatingPoint> {
open var name: String { return "" }
var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }
func update(
state: inout [String: Tensor<Scalar>],
for param: Tensor<Scalar>,
along direction: Tensor<Scalar>,
config: inout HeterogeneousDictionary
) { }
}
//export
open class StepDelegate<Scalar: TensorFlowFloatingPoint> {
var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }
func update(
param: inout Tensor<Scalar>,
along direction: inout Tensor<Scalar>,
state: [String: Tensor<Scalar>],
config: inout HeterogeneousDictionary
) { }
}
//export
class StatefulOptimizer<Model: Layer,
Scalar: TensorFlowFloatingPoint>: Optimizer
where Model.AllDifferentiableVariables == Model.CotangentVector{
var configs: [HeterogeneousDictionary]
var learningRate: Float {
get { return configs.last![LearningRate()] }
set {
for i in configs.indices {self.configs[i][LearningRate()] = newValue }
}
}
var learningRates: [Float] {
get {
var res: [Float] = []
for config in configs {res.append(config[LearningRate()])}
return res
}
set {
for i in configs.indices {self.configs[i][LearningRate()] = newValue[i] }
}
}
var splits: (Int) -> Int
var states: [String: Model.AllDifferentiableVariables]
var statDelegates: [StatDelegate<Scalar>]
var stepDelegates: [StepDelegate<Scalar>]
init(
stepDelegates: [StepDelegate<Scalar>],
statDelegates: [StatDelegate<Scalar>],
configs: [HeterogeneousDictionary],
splits: @escaping (Int) -> Int
) {
self.configs = Array(repeating: HeterogeneousDictionary(), count: configs.count)
states = [:]
for stepDelegate in stepDelegates {
for i in self.configs.indices { self.configs[i].merge(stepDelegate.defaultConfig) { (_, new) in new } }
}
for statDelegate in statDelegates {
for i in self.configs.indices { self.configs[i].merge(statDelegate.defaultConfig) { (_, new) in new } }
states[statDelegate.name] = Model.AllDifferentiableVariables.zero
}
for i in 0..<configs.count {
self.configs[i].merge(configs[i]) { (_, new) in new }
}
self.stepDelegates = stepDelegates
self.statDelegates = statDelegates
self.splits = splits
}
func update(
_ model: inout Model.AllDifferentiableVariables,
along direction: Model.CotangentVector
) {
for (i,kp) in model.recursivelyAllWritableKeyPaths(to: Tensor<Scalar>.self).enumerated() {
var grad = direction[keyPath: kp]
var state = states.mapValues(){$0[keyPath: kp]}
var config = configs[splits(i)]
for statDelegate in statDelegates {
statDelegate.update(
state: &state,
for: model[keyPath: kp],
along: grad,
config: &config
)
}
for n in states.keys { states[n]![keyPath: kp] = state[n]! }
for stepDelegate in stepDelegates {
stepDelegate.update(
param: &model[keyPath: kp],
along: &grad,
state: state,
config: &config
)
}
}
}
}
//export
class SGDStep: StepDelegate<Float> {
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
param -= direction * config[LearningRate()]
}
}
//export
public struct WeightDecayKey: HetDictKey, Equatable {
public static var defaultValue: Float = 0.0
}
class WeightDecay: StepDelegate<Float> {
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
param *= 1 - config[LearningRate()] * config[WeightDecayKey()]
}
}
//export
class L2Regularization: StepDelegate<Float> {
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
direction += config[WeightDecayKey()] * param
}
}
//export
public struct Momentum: HetDictKey, Equatable {
public static var defaultValue: Float = 0.9
}
public struct MomentumDampening: HetDictKey, Equatable {
public static var defaultValue: Float = 0.9
}
class AverageGrad: StatDelegate<Float> {
let dampened: Bool
init(dampened: Bool = false) { self.dampened = dampened }
override var name: String { return "averageGrad" }
override func update(
state: inout [String: Tensor<Float>],
for param: Tensor<Float>,
along direction: Tensor<Float>,
config: inout HeterogeneousDictionary
) {
state["averageGrad"]! *= config[Momentum()]
config[MomentumDampening()] = 1.0 - (dampened ? config[Momentum()] : 0.0)
state["averageGrad"]! += config[MomentumDampening()] * direction
}
}
func split_func(_ a: Int) -> Int { return a < 2 ? 0 : 1 }
var configs = [HeterogeneousDictionary(LearningRate(), 0.0), HeterogeneousDictionary(LearningRate(), 0.01)]
let opt = StatefulOptimizer<BasicModel, Float>(stepDelegates: [SGDStep()], statDelegates: [],
configs: configs, splits: split_func)
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))
let params = learner.model.allDifferentiableVariables
for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) {
print(params[keyPath: kp][0])
}
[ 0.0148044545, -0.018168775, 0.08359044, 0.032670785, -0.059204474, 0.0033588395,
-0.040620767, -0.04872285, 0.038860243, -0.076237716, 0.032958053, -0.025008192,
-0.045404717, -0.055635635, 0.061349068, 0.023430856, 0.06070709, -0.04115163,
-0.07197424, -0.061004944, 0.05786184, 0.07555689, -0.03056003, -0.0058024437,
0.074971415, -0.011336141, -0.017656116, -0.034722082, -0.04728878, 0.07238687,
-0.07602549, -0.057238247, 0.030227048, -0.0347825, -0.038982436, -0.055281255,
0.07062517, -0.038831223, 0.025091609, -0.0579995, -0.044969853, 0.06816071,
-0.06059001, 0.002419782, -0.046240397, 0.0035123578, 0.036387447, -0.07582915,
-0.04411659, 0.05419134]
0.0
[ -0.25084066, 0.21469887, -0.13831557, 0.31209216, -0.045463495, -0.2620387,
-0.1689401, 0.118945606, 0.17115703, -0.07897408]
0.0
learner.fit(2)
Epoch 0: [1.2309564, 0.6487]
Epoch 1: [0.99529856, 0.7125]
let params = learner.model.allDifferentiableVariables
for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) {
print(params[keyPath: kp][0])
}
[ 0.0148044545, -0.018168775, 0.08359044, 0.032670785, -0.059204474, 0.0033588395,
-0.040620767, -0.04872285, 0.038860243, -0.076237716, 0.032958053, -0.025008192,
-0.045404717, -0.055635635, 0.061349068, 0.023430856, 0.06070709, -0.04115163,
-0.07197424, -0.061004944, 0.05786184, 0.07555689, -0.03056003, -0.0058024437,
0.074971415, -0.011336141, -0.017656116, -0.034722082, -0.04728878, 0.07238687,
-0.07602549, -0.057238247, 0.030227048, -0.0347825, -0.038982436, -0.055281255,
0.07062517, -0.038831223, 0.025091609, -0.0579995, -0.044969853, 0.06816071,
-0.06059001, 0.002419782, -0.046240397, 0.0035123578, 0.036387447, -0.07582915,
-0.04411659, 0.05419134]
0.0
[ 0.05842939, 0.19405742, 0.22415991, 0.613552, -0.52648324, -0.042704947,
-0.020534433, -0.24653265, 0.10602849, -0.4876513]
-0.048730064
//export
class MomentumStep: StepDelegate<Float> {
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
param -= config[LearningRate()] * state["averageGrad"]!
}
}
let opt = StatefulOptimizer<BasicModel, Float>(stepDelegates: [MomentumStep()], statDelegates: [AverageGrad()],
config: HeterogeneousDictionary(LearningRate(), 0.01))
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))
learner.fit(2)
Epoch 0: [0.17444672, 0.9463]
Epoch 1: [0.13051678, 0.9596]
//export
public struct SquareMomentum: HetDictKey, Equatable {
public static var defaultValue: Float = 0.99
}
public struct SquareMomentumDampening: HetDictKey, Equatable {
public static var defaultValue: Float = 0.99
}
class AverageSquaredGrad: StatDelegate<Float> {
let dampened: Bool
init(dampened: Bool = false) { self.dampened = dampened }
override var name: String { return "averageSquaredGrad" }
override func update(
state: inout [String: Tensor<Float>],
for param: Tensor<Float>,
along direction: Tensor<Float>,
config: inout HeterogeneousDictionary
) {
state["averageSquaredGrad"]! *= config[SquareMomentum()]
config[SquareMomentumDampening()] = 1.0 - (dampened ? config[SquareMomentum()] : 0.0)
state["averageSquaredGrad"]! += config[SquareMomentumDampening()] * direction.squared()
}
}
//export
class StepCount: StatDelegate<Float> {
override var name: String { return "step" }
override func update(
state: inout [String: Tensor<Float>],
for param: Tensor<Float>,
along direction: Tensor<Float>,
config: inout HeterogeneousDictionary
) {
state["step"]! += 1.0
}
}
//export
func debias<Scalar: TensorFlowFloatingPoint>(
momentum: Scalar,
dampening: Scalar,
step: Tensor<Scalar>
) -> Tensor<Scalar> {
return dampening * (1 - pow(momentum, step)) / (1 - momentum)
}
//export
public struct Epsilon: HetDictKey, Equatable {
public static var defaultValue: Float = 1e-5
}
class AdamStep: StepDelegate<Float> {
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
let debiasedLearningRate = config[LearningRate()] / debias(
momentum: config[Momentum()],
dampening: config[MomentumDampening()],
step: state["step"]!
)
let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias(
momentum: config[SquareMomentum()],
dampening: config[SquareMomentumDampening()],
step: state["step"]!
)) + config[Epsilon()]
param -= debiasedLearningRate * state["averageGrad"]! / debiasedRMSGrad
}
}
let opt = StatefulOptimizer<BasicModel, Float>(
stepDelegates: [AdamStep()],
statDelegates: [AverageGrad(), AverageSquaredGrad(), StepCount()],
config: HeterogeneousDictionary(LearningRate(), 0.01))
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))
learner.fit(2)
Epoch 0: [0.22817639, 0.9364]
Epoch 1: [0.20807356, 0.9413]
class LambStep: StepDelegate<Float> {
override var defaultConfig: HeterogeneousDictionary {
return HeterogeneousDictionary(Epsilon(), 1e-6, WeightDecayKey(), 0.0)
}
override func update(
param: inout Tensor<Float>,
along direction: inout Tensor<Float>,
state: [String: Tensor<Float>],
config: inout HeterogeneousDictionary
) {
let debiasedAverageGrad = state["averageGrad"]! / debias(
momentum: config[Momentum()],
dampening: config[MomentumDampening()],
step: state["step"]!
)
let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias(
momentum: config[SquareMomentum()],
dampening: config[SquareMomentumDampening()],
step: state["step"]!
) + config[Epsilon()])
let step = debiasedAverageGrad / debiasedRMSGrad + config[WeightDecayKey()] * param
let r1 = sqrt((param * param).mean())
let r2 = sqrt((step * step).mean())
let factor = min(r1 / r2, Float(10.0))
param -= config[LearningRate()] * factor * step
}
}
notebookToScript(fname: (Path.cwd / "09_optimizer.ipynb").string)