#@title ##### Licensed under the Apache License, Version 2.0 (the "License"); { display-mode: "form" } # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import time import numpy as np import matplotlib.pyplot as plt import tensorflow.compat.v2 as tf tf.enable_v2_behavior() import tensorflow_datasets as tfds import tensorflow_probability as tfp from discussion import nn as tfp_nn # Globally Enable XLA. # tf.config.optimizer.set_jit(True) try: physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) except: # Invalid device or cannot modify virtual devices once initialized. pass tfb = tfp.bijectors tfd = tfp.distributions [train_dataset, eval_dataset], datasets_info = tfds.load( name='mnist', split=['train', 'test'], with_info=True, as_supervised=True, shuffle_files=True) def _preprocess(image, label): image = tf.cast(image, dtype=tf.float32) / 255. label = tf.cast(label, dtype=tf.int32) return image, label train_size = datasets_info.splits['train'].num_examples batch_size = 32 train_dataset = tfp_nn.util.tune_dataset( train_dataset, batch_size=batch_size, shuffle_size=int(train_size / 7), preprocess_fn=_preprocess) eval_dataset = tfp_nn.util.tune_dataset( eval_dataset, repeat_count=None, preprocess_fn=_preprocess) x, y = next(iter(eval_dataset.batch(10))) tfp_nn.util.display_imgs(x, y); max_pool = tf.keras.layers.MaxPooling2D( # Has no tf.Variables. pool_size=(2, 2), strides=(2, 2), padding='SAME', data_format='channels_last') bnn = tfp_nn.Sequential([ tfp_nn.ConvolutionVariationalFlipout( input_size=1, output_size=8, filter_shape=5, padding='SAME', penalty_weight=1. / train_size, name='conv1'), tf.nn.leaky_relu, max_pool, # [28, 28, 8] -> [14, 14, 8] tfp_nn.ConvolutionVariationalFlipout( input_size=8, output_size=16, filter_shape=5, padding='SAME', penalty_weight = 1. / train_size, name='conv2'), tf.nn.leaky_relu, max_pool, # [14, 14, 16] -> [7, 7, 16] tfp_nn.util.flatten_rightmost, tfp_nn.AffineVariationalReparameterizationLocal( input_size=7 * 7 * 16, output_size=10, penalty_weight = 1. / train_size, name='affine1'), lambda x: tfd.Categorical(logits=x, dtype=tf.int32), ], name='BNN') print(bnn.summary()) train_iter = iter(train_dataset) eval_iter = iter(eval_dataset.batch(2000).repeat()) def loss(): x, y = next(train_iter) nll = -tf.reduce_mean(bnn(x).log_prob(y), axis=-1) kl = bnn.extra_loss return nll + kl, (nll, kl) opt = tf.optimizers.Adam(learning_rate=1e-2) fit = tfp_nn.util.make_fit_op( loss, opt, bnn.trainable_variables, grad_summary_fn=lambda gs: tf.nest.map_structure(tf.norm, gs)) @tf.function(autograph=False) def eval(): with tf.xla.experimental.jit_scope(compile_ops=True): x, y = next(eval_iter) yhat = bnn(x) nll = -tf.reduce_mean(yhat.log_prob(y)) kl = bnn.extra_loss loss = nll + kl acc = tf.reduce_mean(tf.cast(tf.equal(y, yhat.mode()), tf.float32), axis=-1) return loss, acc, nll, kl num_train_steps = 20e3 # @param { isTemplate: true} num_train_steps = int(num_train_steps) # Enforce correct type when overridden. dur_sec = dur_num = 0 for i in range(num_train_steps): start = time.time() trn_loss, (trn_nll, trn_kl), g = fit() stop = time.time() dur_sec += stop - start dur_num += 1 if i % 100 == 0 or i == num_train_steps - 1: tst_loss, tst_acc, tst_nll, tst_kl = eval() f, x = zip(*[ ('it:{:5}', opt.iterations), ('ms/it:{:6.4f}', dur_sec / max(1., dur_num) * 1000.), ('tst_acc:{:6.4f}', tst_acc), ('trn_loss:{:6.4f}', trn_loss), ('tst_loss:{:6.4f}', tst_loss), ('tst_nll:{:6.4f}', tst_nll), ('tst_kl:{:6.4f}', tst_kl), ('sum_norm_grad:{:6.4f}', sum(g)), ]) print(' '.join(f).format(*[getattr(x_, 'numpy', lambda: x_)() for x_ in x])) sys.stdout.flush() dur_sec = dur_num = 0 # if i % 1000 == 0 or i == maxiter - 1: # bnn.save('/tmp/bnn.npz') # Run inference multiple times... num_inferences = 10 # @param { isTemplate: true} @tf.function(autograph=False) def predicted_log_prob(x): with tf.xla.experimental.jit_scope(compile_ops=True): return tf.math.log_softmax(bnn(x).logits, axis=-1) eval_iter = iter(eval_dataset.batch(2000).repeat(int(num_inferences))) before_avg_predicted_log_probs = tf.reshape( tf.stack([predicted_log_prob(x) for x, _ in eval_iter], axis=0), shape=[int(num_inferences), datasets_info.splits['test'].num_examples, -1]) bnn_predicted_log_probs = tfp.math.reduce_logmeanexp( before_avg_predicted_log_probs, axis=0) decision = tf.argmax(bnn_predicted_log_probs, axis=-1, output_type=tf.int32) confidence = tf.reduce_max(bnn_predicted_log_probs, axis=-1) threshold = 0.95 decided_idx = tf.where(confidence > np.log(threshold)) ordered = tf.argsort(confidence) n = datasets_info.splits['test'].num_examples x_final, y_final = next(iter(eval_dataset.batch(n))) print('Number of examples undecided: {}'.format(n - tf.size(decided_idx))) accurary = tf.reduce_mean( tf.cast(tf.equal(tf.gather(y_final, decided_idx), tf.gather(decision, decided_idx)), tf.float32)) print('Accurary after excluding undecided ones: {}'.format(accurary)) tfp_nn.util.display_imgs( tf.gather(x_final, ordered[0:50]), tf.gather(y_final, ordered[0:50])); from sklearn import metrics bnn_auc = np.array([ metrics.roc_auc_score(tf.equal(y_final, i), bnn_predicted_log_probs[:, i]) for i in range(10)]) print('Per class AUC:\n{}'.format(bnn_auc[:, np.newaxis])) max_pool = tf.keras.layers.MaxPooling2D( # Has no tf.Variables. pool_size=(2, 2), strides=(2, 2), padding='SAME', data_format='channels_last') dnn = tfp_nn.Sequential([ tfp_nn.Convolution( input_size=1, output_size=8, filter_shape=5, padding='SAME', name='conv1'), tf.nn.leaky_relu, max_pool, # [28, 28, 8] -> [14, 14, 8] tfp_nn.Convolution( input_size=8, output_size=16, filter_shape=5, padding='SAME', name='conv2'), tf.nn.leaky_relu, max_pool, # [14, 14, 16] -> [7, 7, 16] tfp_nn.util.flatten_rightmost, tfp_nn.Affine( input_size=7 * 7 * 16, output_size=10, name='affine1'), lambda x: tfd.Categorical(logits=x, dtype=tf.int32), ], name='DNN') print(dnn.summary()) train_iter = iter(train_dataset) eval_iter = iter(eval_dataset.batch(2000).repeat()) def loss(): x, y = next(train_iter) return -tf.reduce_mean(dnn(x).log_prob(y), axis=-1), None opt = tf.optimizers.Adam(learning_rate=1e-2) fit = tfp_nn.util.make_fit_op( loss, opt, dnn.trainable_variables, grad_summary_fn=lambda gs: tf.nest.map_structure(tf.norm, gs)) @tf.function(autograph=False) def eval(): with tf.xla.experimental.jit_scope(compile_ops=True): x, y = next(eval_iter) yhat = dnn(x) nll = -tf.reduce_mean(yhat.log_prob(y), axis=-1) acc = tf.reduce_mean(tf.cast(tf.equal(y, yhat.mode()), tf.float32), axis=-1) return nll, acc num_train_steps = 20e3 # @param { isTemplate: true} num_train_steps = int(num_train_steps) # Enforce correct type when overridden. dur_sec = dur_num = 0 for i in range(num_train_steps): start = time.time() trn_loss, _, g = fit() stop = time.time() dur_sec += stop - start dur_num += 1 if i % 100 == 0 or i == num_train_steps - 1: tst_loss, tst_acc= eval() f, x = zip(*[ ('it:{:5}', opt.iterations), ('ms/it:{:6.4f}', dur_sec / max(1., dur_num) * 1000.), ('tst_acc:{:6.4f}', tst_acc), ('trn_loss:{:6.4f}', trn_loss), ('tst_loss:{:6.4f}', tst_loss), ('tst_nll:{:6.4f}', tst_nll), ('tst_kl:{:6.4f}', tst_kl), ('sum_norm_grad:{:6.4f}', sum(g)), ]) print(' '.join(f).format(*[getattr(x_, 'numpy', lambda: x_)() for x_ in x])) sys.stdout.flush() dur_sec = dur_num = 0 # if i % 1000 == 0 or i == maxiter - 1: # dnn.save('/tmp/vae.npz') @tf.function(autograph=False) def dnn_predicted_log_prob(x): with tf.xla.experimental.jit_scope(compile_ops=True): return tf.math.log_softmax(dnn(x).logits, axis=-1) eval_iter = iter(eval_dataset.batch(2000)) dnn_predicted_log_probs = tf.reshape( tf.stack([dnn_predicted_log_prob(x) for x, _ in eval_iter], axis=0), shape=[datasets_info.splits['test'].num_examples, -1]) decision = tf.argmax(dnn_predicted_log_probs, axis=-1, output_type=tf.int32) confidence = tf.reduce_max(dnn_predicted_log_probs, axis=-1) threshold = 0.95 decided_idx = tf.where(confidence > np.log(threshold)) ordered = tf.argsort(confidence) n = datasets_info.splits['test'].num_examples x_final, y_final = next(iter(eval_dataset.batch(n))) print('Number of examples undecided: {}'.format(n - tf.size(decided_idx))) accurary = tf.reduce_mean( tf.cast(tf.equal(tf.gather(y_final, decided_idx), tf.gather(decision, decided_idx)), tf.float32)) print('Accurary after excluding undecided ones: {}'.format(accurary)) tfp_nn.util.display_imgs( tf.gather(x_final, ordered[0:50]), tf.gather(y_final, ordered[0:50])); from sklearn import metrics dnn_auc = np.array([ metrics.roc_auc_score(tf.equal(y_final, i), dnn_predicted_log_probs[:, i]) for i in range(10)]) print('Per class AUC:\n{}'.format(dnn_auc[:, np.newaxis]))