antoine-tran · January 16, 2017 11:05 · Jun 2, 2016 · Jun 2, 2016 · Jun 2, 2016 · May 20, 2016
diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -70,7 +70,7 @@ def cost(self):
 
     @lazy_property
     def optimize(self):
-        learning_rate = 0.003
+        learning_rate = 0.0003
         optimizer = tf.train.AdamOptimizer(learning_rate)
         return optimizer.minimize(self.cost)
 

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -58,16 +58,19 @@ def prediction(self):
 
     @lazy_property
     def cost(self):
+        # Compute cross entropy for each frame.
         cross_entropy = self.target * tf.log(self.prediction)
         cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
         mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
-        cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
-        cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
+        cross_entropy *= mask
+        # Average over actual sequence lengths.
+        cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
+        cross_entropy /= tf.cast(self.length, tf.float32)
         return tf.reduce_mean(cross_entropy)
 
     @lazy_property
     def optimize(self):
-        learning_rate = 0.0003
+        learning_rate = 0.003
         optimizer = tf.train.AdamOptimizer(learning_rate)
         return optimizer.minimize(self.cost)
 
@@ -78,8 +81,9 @@ def error(self):
         mistakes = tf.cast(mistakes, tf.float32)
         mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
         mistakes *= mask
+        # Average over actual sequence lengths.
         mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
-        mistakes /= tf.reduce_sum(mask, reduction_indices=1)
+        mistakes /= tf.cast(self.length, tf.float32)
         return tf.reduce_mean(mistakes)
 
     @staticmethod
@@ -113,4 +117,4 @@ def get_dataset():
             batch = train.sample(10)
             sess.run(model.optimize, {data: batch.data, target: batch.target})
         error = sess.run(model.error, {data: test.data, target: test.target})
-        print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
+        print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -42,7 +42,7 @@ def prediction(self):
         # Recurrent network.
         output, _ = rnn.dynamic_rnn(
             rnn_cell.GRUCell(self._num_hidden),
-            data,
+            self.data,
             dtype=tf.float32,
             sequence_length=self.length,
         )

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -40,18 +40,20 @@ def length(self):
     @lazy_property
     def prediction(self):
         # Recurrent network.
-        outputs, _ = rnn.rnn(
+        output, _ = rnn.dynamic_rnn(
             rnn_cell.GRUCell(self._num_hidden),
-            self._unpack_sequence(data),
+            data,
             dtype=tf.float32,
             sequence_length=self.length,
         )
         # Softmax layer.
-        weight, bias = self._weight_and_bias(
-            self._num_hidden, int(self.target.get_shape()[2]))
-        predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
-                       for x in outputs]
-        prediction = self._pack_sequence(predictions)
+        max_length = int(self.target.get_shape()[1])
+        num_classes = int(self.target.get_shape()[2])
+        weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
+        # Flatten to apply same weights to all time steps.
+        output = tf.reshape(output, [-1, self._num_hidden])
+        prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
+        prediction = tf.reshape(prediction, [-1, max_length, num_classes])
         return prediction
 
     @lazy_property
@@ -86,14 +88,6 @@ def _weight_and_bias(in_size, out_size):
         bias = tf.constant(0.1, shape=[out_size])
         return tf.Variable(weight), tf.Variable(bias)
 
-    @staticmethod
-    def _unpack_sequence(tensor):
-        return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))
-
-    @staticmethod
-    def _pack_sequence(sequence):
-        return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])
-
 
 def get_dataset():
     """Read dataset and flatten images."""

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -65,7 +65,7 @@ def cost(self):
 
     @lazy_property
     def optimize(self):
-        learning_rate = 0.03
+        learning_rate = 0.0003
         optimizer = tf.train.AdamOptimizer(learning_rate)
         return optimizer.minimize(self.cost)
 

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -21,7 +21,7 @@ def wrapper(self):
 
 class VariableSequenceLabelling:
 
-    def __init__(self, data, target, num_hidden=200, num_layers=2):
+    def __init__(self, data, target, num_hidden=200, num_layers=3):
         self.data = data
         self.target = target
         self._num_hidden = num_hidden
@@ -65,15 +65,20 @@ def cost(self):
 
     @lazy_property
     def optimize(self):
-        learning_rate = 0.01
-        optimizer = tf.train.RMSPropOptimizer(learning_rate)
+        learning_rate = 0.03
+        optimizer = tf.train.AdamOptimizer(learning_rate)
         return optimizer.minimize(self.cost)
 
     @lazy_property
     def error(self):
         mistakes = tf.not_equal(
-            tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
-        return tf.reduce_mean(tf.cast(mistakes, tf.float32))
+            tf.argmax(self.target, 2), tf.argmax(self.prediction, 2))
+        mistakes = tf.cast(mistakes, tf.float32)
+        mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
+        mistakes *= mask
+        mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
+        mistakes /= tf.reduce_sum(mask, reduction_indices=1)
+        return tf.reduce_mean(mistakes)
 
     @staticmethod
     def _weight_and_bias(in_size, out_size):
@@ -114,4 +119,4 @@ def get_dataset():
             batch = train.sample(10)
             sess.run(model.optimize, {data: batch.data, target: batch.target})
         error = sess.run(model.error, {data: test.data, target: test.target})
-        print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
+        print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -48,7 +48,7 @@ def prediction(self):
         )
         # Softmax layer.
         weight, bias = self._weight_and_bias(
-            self._num_hidden, int(target.get_shape()[2]))
+            self._num_hidden, int(self.target.get_shape()[2]))
         predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
                        for x in outputs]
         prediction = self._pack_sequence(predictions)
@@ -58,7 +58,7 @@ def prediction(self):
     def cost(self):
         cross_entropy = self.target * tf.log(self.prediction)
         cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
-        mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
+        mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
         cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
         cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
         return tf.reduce_mean(cross_entropy)

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -89,16 +89,6 @@ def _unpack_sequence(tensor):
     def _pack_sequence(sequence):
         return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])
 
-    @staticmethod
-    def _last_relevant(output, length):
-        batch_size = tf.shape(output)[0]
-        max_length = int(output.get_shape()[1])
-        output_size = int(output.get_shape()[2])
-        index = tf.range(0, batch_size) * max_length + (length - 1)
-        flat = tf.reshape(output, [-1, output_size])
-        relevant = tf.gather(flat, index)
-        return relevant
-
 
 def get_dataset():
     """Read dataset and flatten images."""

diff --git a/blog_tensorflow_variable_sequence_labelling.py b/blog_tensorflow_variable_sequence_labelling.py
@@ -0,0 +1,127 @@
+# Working example for my blog post at:
+# http://danijar.com/variable-sequence-lengths-in-tensorflow/
+import functools
+import sets
+import tensorflow as tf
+from tensorflow.models.rnn import rnn_cell
+from tensorflow.models.rnn import rnn
+
+
+def lazy_property(function):
+    attribute = '_' + function.__name__
+
+    @property
+    @functools.wraps(function)
+    def wrapper(self):
+        if not hasattr(self, attribute):
+            setattr(self, attribute, function(self))
+        return getattr(self, attribute)
+    return wrapper
+
+
+class VariableSequenceLabelling:
+
+    def __init__(self, data, target, num_hidden=200, num_layers=2):
+        self.data = data
+        self.target = target
+        self._num_hidden = num_hidden
+        self._num_layers = num_layers
+        self.prediction
+        self.error
+        self.optimize
+
+    @lazy_property
+    def length(self):
+        used = tf.sign(tf.reduce_max(tf.abs(self.data), reduction_indices=2))
+        length = tf.reduce_sum(used, reduction_indices=1)
+        length = tf.cast(length, tf.int32)
+        return length
+
+    @lazy_property
+    def prediction(self):
+        # Recurrent network.
+        outputs, _ = rnn.rnn(
+            rnn_cell.GRUCell(self._num_hidden),
+            self._unpack_sequence(data),
+            dtype=tf.float32,
+            sequence_length=self.length,
+        )
+        # Softmax layer.
+        weight, bias = self._weight_and_bias(
+            self._num_hidden, int(target.get_shape()[2]))
+        predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
+                       for x in outputs]
+        prediction = self._pack_sequence(predictions)
+        return prediction
+
+    @lazy_property
+    def cost(self):
+        cross_entropy = self.target * tf.log(self.prediction)
+        cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
+        mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
+        cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
+        cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
+        return tf.reduce_mean(cross_entropy)
+
+    @lazy_property
+    def optimize(self):
+        learning_rate = 0.01
+        optimizer = tf.train.RMSPropOptimizer(learning_rate)
+        return optimizer.minimize(self.cost)
+
+    @lazy_property
+    def error(self):
+        mistakes = tf.not_equal(
+            tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
+        return tf.reduce_mean(tf.cast(mistakes, tf.float32))
+
+    @staticmethod
+    def _weight_and_bias(in_size, out_size):
+        weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
+        bias = tf.constant(0.1, shape=[out_size])
+        return tf.Variable(weight), tf.Variable(bias)
+
+    @staticmethod
+    def _unpack_sequence(tensor):
+        return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))
+
+    @staticmethod
+    def _pack_sequence(sequence):
+        return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])
+
+    @staticmethod
+    def _last_relevant(output, length):
+        batch_size = tf.shape(output)[0]
+        max_length = int(output.get_shape()[1])
+        output_size = int(output.get_shape()[2])
+        index = tf.range(0, batch_size) * max_length + (length - 1)
+        flat = tf.reshape(output, [-1, output_size])
+        relevant = tf.gather(flat, index)
+        return relevant
+
+
+def get_dataset():
+    """Read dataset and flatten images."""
+    dataset = sets.Ocr()
+    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
+    dataset['data'] = dataset.data.reshape(
+        dataset.data.shape[:-2] + (-1,)).astype(float)
+    train, test = sets.Split(0.66)(dataset)
+    return train, test
+
+
+if __name__ == '__main__':
+    train, test = get_dataset()
+    _, length, image_size = train.data.shape
+    num_classes = train.target.shape[2]
+    data = tf.placeholder(tf.float32, [None, length, image_size])
+    target = tf.placeholder(tf.float32, [None, length, num_classes])
+    model = VariableSequenceLabelling(data, target)
+    sess = tf.Session()
+    sess.run(tf.initialize_all_variables())
+    for epoch in range(10):
+        for _ in range(100):
+            batch = train.sample(10)
+            sess.run(model.optimize, {data: batch.data, target: batch.target})
+        error = sess.run(model.error, {data: test.data, target: test.target})
+        print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
No results found