piyueh · April 28, 2025 04:09 · Jun 8, 2020 · Nov 5, 2019 · Nov 2, 2019
diff --git a/tf_keras_tfp_lbfgs.py b/tf_keras_tfp_lbfgs.py
@@ -95,6 +95,9 @@ def f(params_1d):
         f.iter.assign_add(1)
         tf.print("Iter:", f.iter, "loss:", loss_value)
 
+        # store loss value so we can retrieve later
+        tf.py_function(f.history.append, inp=[loss_value], Tout=[])
+
         return loss_value, grads
 
     # store these information as members so we can use them outside the scope
@@ -103,6 +106,7 @@ def f(params_1d):
     f.part = part
     f.shapes = shapes
     f.assign_new_model_parameters = assign_new_model_parameters
+    f.history = []
 
     return f
 
@@ -158,3 +162,9 @@ def plot_helper(inputs, outputs, title, fname):
     plot_helper(inps, pred_outs, "Predicted solution", "pred_soln.png")
     plot_helper(inps, err, "Absolute error", "abs_err.png")
     pyplot.show()
+
+    # print out history
+    print("\n"+"="*80)
+    print("History")
+    print("="*80)
+    print(*func.history, sep='\n')
diff --git a/tf_keras_tfp_lbfgs.py b/tf_keras_tfp_lbfgs.py
@@ -48,28 +48,26 @@ def function_factory(model, loss, train_x, train_y):
 
     for i, shape in enumerate(shapes):
         n = numpy.product(shape)
-        idx.append(list(numpy.arange(count, count+n).reshape(shape)))
+        idx.append(tf.reshape(tf.range(count, count+n, dtype=tf.int32), shape))
         part.extend([i]*n)
         count += n
 
-    def convet_2_list_of_ndarray(params_1d):
-        """A function coverting 1D tf.Tensor to a list of ndarray.
+    part = tf.constant(part)
+
+    @tf.function
+    def assign_new_model_parameters(params_1d):
+        """A function updating the model's parameters with a 1D tf.Tensor.
 
         Args:
             params_1d [in]: a 1D tf.Tensor representing the model's trainable parameters.
-
-        Returns:
-            A list of numpy.ndarray that can be used by model.set_weights(..).
         """
 
-        model_params = []
         params = tf.dynamic_partition(params_1d, part, n_tensors)
-        for i, param in enumerate(params):
-            # this only work with eager execution mode
-            model_params.append(param.numpy().reshape(shapes[i]))
-        return model_params
+        for i, (shape, param) in enumerate(zip(shapes, params)):
+            model.trainable_variables[i].assign(tf.reshape(param, shape))
 
-    # now create the function that will be returned by this factory
+    # now create a function that will be returned by this factory
+    @tf.function
     def f(params_1d):
         """A function that can be used by tfp.optimizer.lbfgs_minimize.
 
@@ -84,31 +82,27 @@ def f(params_1d):
 
         # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters
         with tf.GradientTape() as tape:
-            # convert the 1D tf.Tensor to a list of numpy.ndarray
-            model_params = convet_2_list_of_ndarray(params_1d)
-
             # update the parameters in the model
-            model.set_weights(model_params)
-
+            assign_new_model_parameters(params_1d)
             # calculate the loss
             loss_value = loss(model(train_x, training=True), train_y)
 
         # calculate gradients and convert to 1D tf.Tensor
         grads = tape.gradient(loss_value, model.trainable_variables)
         grads = tf.dynamic_stitch(idx, grads)
 
-        # just for writing something to stdout so that we know it's running
-        f.iter += 1
-        tf.print("Iter: {}  Loss: {}".format(f.iter, loss_value.numpy()))
+        # print out iteration & loss
+        f.iter.assign_add(1)
+        tf.print("Iter:", f.iter, "loss:", loss_value)
 
         return loss_value, grads
 
     # store these information as members so we can use them outside the scope
-    f.iter = 0
+    f.iter = tf.Variable(0)
     f.idx = idx
     f.part = part
     f.shapes = shapes
-    f.convet_2_list_of_ndarray = convet_2_list_of_ndarray
+    f.assign_new_model_parameters = assign_new_model_parameters
 
     return f
 
@@ -152,15 +146,15 @@ def plot_helper(inputs, outputs, title, fname):
 
     # after training, the final optimized parameters are still in results.position
     # so we have to manually put them back to the model
-    pred_model.set_weights(func.convet_2_list_of_ndarray(results.position))
+    func.assign_new_model_parameters(results.position)
 
     # do some prediction
     pred_outs = pred_model.predict(inps)
     err = numpy.abs(pred_outs-outs)
-    print("l2 error norm: {}".format(numpy.linalg.norm(err)))
+    print("L2-error norm: {}".format(numpy.linalg.norm(err)/numpy.sqrt(11)))
 
     # plot figures
     plot_helper(inps, outs, "Exact solution", "ext_soln.png")
     plot_helper(inps, pred_outs, "Predicted solution", "pred_soln.png")
     plot_helper(inps, err, "Absolute error", "abs_err.png")
-    pyplot.show()
+    pyplot.show()
diff --git a/tf_keras_tfp_lbfgs.py b/tf_keras_tfp_lbfgs.py
@@ -0,0 +1,166 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+#
+# Copyright © 2019 Pi-Yueh Chuang <[email protected]>
+#
+# Distributed under terms of the MIT license.
+
+"""An example of using tfp.optimizer.lbfgs_minimize to optimize a TensorFlow model.
+
+This code shows a naive way to wrap a tf.keras.Model and optimize it with the L-BFGS
+optimizer from TensorFlow Probability.
+
+Python interpreter version: 3.6.9
+TensorFlow version: 2.0.0
+TensorFlow Probability version: 0.8.0
+NumPy version: 1.17.2
+Matplotlib version: 3.1.1
+"""
+import numpy
+import tensorflow as tf
+import tensorflow_probability as tfp
+from matplotlib import pyplot
+
+def function_factory(model, loss, train_x, train_y):
+    """A factory to create a function required by tfp.optimizer.lbfgs_minimize.
+
+    Args:
+        model [in]: an instance of `tf.keras.Model` or its subclasses.
+        loss [in]: a function with signature loss_value = loss(pred_y, true_y).
+        train_x [in]: the input part of training data.
+        train_y [in]: the output part of training data.
+
+    Returns:
+        A function that has a signature of:
+            loss_value, gradients = f(model_parameters).
+    """
+
+    # obtain the shapes of all trainable parameters in the model
+    shapes = tf.shape_n(model.trainable_variables)
+    n_tensors = len(shapes)
+
+    # we'll use tf.dynamic_stitch and tf.dynamic_partition later, so we need to
+    # prepare required information first
+    count = 0
+    idx = [] # stitch indices
+    part = [] # partition indices
+
+    for i, shape in enumerate(shapes):
+        n = numpy.product(shape)
+        idx.append(list(numpy.arange(count, count+n).reshape(shape)))
+        part.extend([i]*n)
+        count += n
+
+    def convet_2_list_of_ndarray(params_1d):
+        """A function coverting 1D tf.Tensor to a list of ndarray.
+
+        Args:
+            params_1d [in]: a 1D tf.Tensor representing the model's trainable parameters.
+
+        Returns:
+            A list of numpy.ndarray that can be used by model.set_weights(..).
+        """
+
+        model_params = []
+        params = tf.dynamic_partition(params_1d, part, n_tensors)
+        for i, param in enumerate(params):
+            # this only work with eager execution mode
+            model_params.append(param.numpy().reshape(shapes[i]))
+        return model_params
+
+    # now create the function that will be returned by this factory
+    def f(params_1d):
+        """A function that can be used by tfp.optimizer.lbfgs_minimize.
+
+        This function is created by function_factory.
+
+        Args:
+           params_1d [in]: a 1D tf.Tensor.
+
+        Returns:
+            A scalar loss and the gradients w.r.t. the `params_1d`.
+        """
+
+        # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters
+        with tf.GradientTape() as tape:
+            # convert the 1D tf.Tensor to a list of numpy.ndarray
+            model_params = convet_2_list_of_ndarray(params_1d)
+
+            # update the parameters in the model
+            model.set_weights(model_params)
+
+            # calculate the loss
+            loss_value = loss(model(train_x, training=True), train_y)
+
+        # calculate gradients and convert to 1D tf.Tensor
+        grads = tape.gradient(loss_value, model.trainable_variables)
+        grads = tf.dynamic_stitch(idx, grads)
+
+        # just for writing something to stdout so that we know it's running
+        f.iter += 1
+        tf.print("Iter: {}  Loss: {}".format(f.iter, loss_value.numpy()))
+
+        return loss_value, grads
+
+    # store these information as members so we can use them outside the scope
+    f.iter = 0
+    f.idx = idx
+    f.part = part
+    f.shapes = shapes
+    f.convet_2_list_of_ndarray = convet_2_list_of_ndarray
+
+    return f
+
+def plot_helper(inputs, outputs, title, fname):
+    """Plot helper"""
+    pyplot.figure()
+    pyplot.tricontourf(inputs[:, 0], inputs[:, 1], outputs.flatten(), 100)
+    pyplot.xlabel("x")
+    pyplot.ylabel("y")
+    pyplot.title(title)
+    pyplot.colorbar()
+    pyplot.savefig(fname)
+
+if __name__ == "__main__":
+
+    # use float64 by default
+    tf.keras.backend.set_floatx("float64")
+
+    # prepare training data
+    x_1d = numpy.linspace(-1., 1., 11)
+    x1, x2 = numpy.meshgrid(x_1d, x_1d)
+    inps = numpy.stack((x1.flatten(), x2.flatten()), 1)
+    outs = numpy.reshape(inps[:, 0]**2+inps[:, 1]**2, (x_1d.size**2, 1))
+
+    # prepare prediction model, loss function, and the function passed to L-BFGS solver
+    pred_model = tf.keras.Sequential(
+        [tf.keras.Input(shape=[2,]),
+         tf.keras.layers.Dense(64, "tanh"),
+         tf.keras.layers.Dense(64, "tanh"),
+         tf.keras.layers.Dense(1, None)])
+
+    loss_fun = tf.keras.losses.MeanSquaredError()
+    func = function_factory(pred_model, loss_fun, inps, outs)
+
+    # convert initial model parameters to a 1D tf.Tensor
+    init_params = tf.dynamic_stitch(func.idx, pred_model.trainable_variables)
+
+    # train the model with L-BFGS solver
+    results = tfp.optimizer.lbfgs_minimize(
+        value_and_gradients_function=func, initial_position=init_params, max_iterations=500)
+
+    # after training, the final optimized parameters are still in results.position
+    # so we have to manually put them back to the model
+    pred_model.set_weights(func.convet_2_list_of_ndarray(results.position))
+
+    # do some prediction
+    pred_outs = pred_model.predict(inps)
+    err = numpy.abs(pred_outs-outs)
+    print("l2 error norm: {}".format(numpy.linalg.norm(err)))
+
+    # plot figures
+    plot_helper(inps, outs, "Exact solution", "ext_soln.png")
+    plot_helper(inps, pred_outs, "Predicted solution", "pred_soln.png")
+    plot_helper(inps, err, "Absolute error", "abs_err.png")
+    pyplot.show()
No results found