Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save antoine-tran/16c0ceaf1f8fa33631957d4bff83f7dd to your computer and use it in GitHub Desktop.

Select an option

Save antoine-tran/16c0ceaf1f8fa33631957d4bff83f7dd to your computer and use it in GitHub Desktop.

Revisions

  1. @danijar danijar revised this gist Jun 2, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -70,7 +70,7 @@ def cost(self):

    @lazy_property
    def optimize(self):
    learning_rate = 0.003
    learning_rate = 0.0003
    optimizer = tf.train.AdamOptimizer(learning_rate)
    return optimizer.minimize(self.cost)

  2. @danijar danijar revised this gist Jun 2, 2016. 1 changed file with 9 additions and 5 deletions.
    14 changes: 9 additions & 5 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -58,16 +58,19 @@ def prediction(self):

    @lazy_property
    def cost(self):
    # Compute cross entropy for each frame.
    cross_entropy = self.target * tf.log(self.prediction)
    cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
    mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
    cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
    cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
    cross_entropy *= mask
    # Average over actual sequence lengths.
    cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1)
    cross_entropy /= tf.cast(self.length, tf.float32)
    return tf.reduce_mean(cross_entropy)

    @lazy_property
    def optimize(self):
    learning_rate = 0.0003
    learning_rate = 0.003
    optimizer = tf.train.AdamOptimizer(learning_rate)
    return optimizer.minimize(self.cost)

    @@ -78,8 +81,9 @@ def error(self):
    mistakes = tf.cast(mistakes, tf.float32)
    mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
    mistakes *= mask
    # Average over actual sequence lengths.
    mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
    mistakes /= tf.reduce_sum(mask, reduction_indices=1)
    mistakes /= tf.cast(self.length, tf.float32)
    return tf.reduce_mean(mistakes)

    @staticmethod
    @@ -113,4 +117,4 @@ def get_dataset():
    batch = train.sample(10)
    sess.run(model.optimize, {data: batch.data, target: batch.target})
    error = sess.run(model.error, {data: test.data, target: test.target})
    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
  3. @danijar danijar revised this gist Jun 2, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -42,7 +42,7 @@ def prediction(self):
    # Recurrent network.
    output, _ = rnn.dynamic_rnn(
    rnn_cell.GRUCell(self._num_hidden),
    data,
    self.data,
    dtype=tf.float32,
    sequence_length=self.length,
    )
  4. @danijar danijar revised this gist May 20, 2016. 1 changed file with 9 additions and 15 deletions.
    24 changes: 9 additions & 15 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -40,18 +40,20 @@ def length(self):
    @lazy_property
    def prediction(self):
    # Recurrent network.
    outputs, _ = rnn.rnn(
    output, _ = rnn.dynamic_rnn(
    rnn_cell.GRUCell(self._num_hidden),
    self._unpack_sequence(data),
    data,
    dtype=tf.float32,
    sequence_length=self.length,
    )
    # Softmax layer.
    weight, bias = self._weight_and_bias(
    self._num_hidden, int(self.target.get_shape()[2]))
    predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
    for x in outputs]
    prediction = self._pack_sequence(predictions)
    max_length = int(self.target.get_shape()[1])
    num_classes = int(self.target.get_shape()[2])
    weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
    # Flatten to apply same weights to all time steps.
    output = tf.reshape(output, [-1, self._num_hidden])
    prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
    prediction = tf.reshape(prediction, [-1, max_length, num_classes])
    return prediction

    @lazy_property
    @@ -86,14 +88,6 @@ def _weight_and_bias(in_size, out_size):
    bias = tf.constant(0.1, shape=[out_size])
    return tf.Variable(weight), tf.Variable(bias)

    @staticmethod
    def _unpack_sequence(tensor):
    return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))

    @staticmethod
    def _pack_sequence(sequence):
    return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])


    def get_dataset():
    """Read dataset and flatten images."""
  5. @danijar danijar revised this gist May 19, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -65,7 +65,7 @@ def cost(self):

    @lazy_property
    def optimize(self):
    learning_rate = 0.03
    learning_rate = 0.0003
    optimizer = tf.train.AdamOptimizer(learning_rate)
    return optimizer.minimize(self.cost)

  6. @danijar danijar revised this gist May 19, 2016. 1 changed file with 11 additions and 6 deletions.
    17 changes: 11 additions & 6 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -21,7 +21,7 @@ def wrapper(self):

    class VariableSequenceLabelling:

    def __init__(self, data, target, num_hidden=200, num_layers=2):
    def __init__(self, data, target, num_hidden=200, num_layers=3):
    self.data = data
    self.target = target
    self._num_hidden = num_hidden
    @@ -65,15 +65,20 @@ def cost(self):

    @lazy_property
    def optimize(self):
    learning_rate = 0.01
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
    learning_rate = 0.03
    optimizer = tf.train.AdamOptimizer(learning_rate)
    return optimizer.minimize(self.cost)

    @lazy_property
    def error(self):
    mistakes = tf.not_equal(
    tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
    return tf.reduce_mean(tf.cast(mistakes, tf.float32))
    tf.argmax(self.target, 2), tf.argmax(self.prediction, 2))
    mistakes = tf.cast(mistakes, tf.float32)
    mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
    mistakes *= mask
    mistakes = tf.reduce_sum(mistakes, reduction_indices=1)
    mistakes /= tf.reduce_sum(mask, reduction_indices=1)
    return tf.reduce_mean(mistakes)

    @staticmethod
    def _weight_and_bias(in_size, out_size):
    @@ -114,4 +119,4 @@ def get_dataset():
    batch = train.sample(10)
    sess.run(model.optimize, {data: batch.data, target: batch.target})
    error = sess.run(model.error, {data: test.data, target: test.target})
    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))
  7. @danijar danijar revised this gist May 19, 2016. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -48,7 +48,7 @@ def prediction(self):
    )
    # Softmax layer.
    weight, bias = self._weight_and_bias(
    self._num_hidden, int(target.get_shape()[2]))
    self._num_hidden, int(self.target.get_shape()[2]))
    predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
    for x in outputs]
    prediction = self._pack_sequence(predictions)
    @@ -58,7 +58,7 @@ def prediction(self):
    def cost(self):
    cross_entropy = self.target * tf.log(self.prediction)
    cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
    mask = tf.sign(tf.reduce_max(tf.abs(self.target), reduction_indices=2))
    cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
    cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
    return tf.reduce_mean(cross_entropy)
  8. @danijar danijar revised this gist May 19, 2016. 1 changed file with 0 additions and 10 deletions.
    10 changes: 0 additions & 10 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -89,16 +89,6 @@ def _unpack_sequence(tensor):
    def _pack_sequence(sequence):
    return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])

    @staticmethod
    def _last_relevant(output, length):
    batch_size = tf.shape(output)[0]
    max_length = int(output.get_shape()[1])
    output_size = int(output.get_shape()[2])
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, output_size])
    relevant = tf.gather(flat, index)
    return relevant


    def get_dataset():
    """Read dataset and flatten images."""
  9. @danijar danijar created this gist May 19, 2016.
    127 changes: 127 additions & 0 deletions blog_tensorflow_variable_sequence_labelling.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,127 @@
    # Working example for my blog post at:
    # http://danijar.com/variable-sequence-lengths-in-tensorflow/
    import functools
    import sets
    import tensorflow as tf
    from tensorflow.models.rnn import rnn_cell
    from tensorflow.models.rnn import rnn


    def lazy_property(function):
    attribute = '_' + function.__name__

    @property
    @functools.wraps(function)
    def wrapper(self):
    if not hasattr(self, attribute):
    setattr(self, attribute, function(self))
    return getattr(self, attribute)
    return wrapper


    class VariableSequenceLabelling:

    def __init__(self, data, target, num_hidden=200, num_layers=2):
    self.data = data
    self.target = target
    self._num_hidden = num_hidden
    self._num_layers = num_layers
    self.prediction
    self.error
    self.optimize

    @lazy_property
    def length(self):
    used = tf.sign(tf.reduce_max(tf.abs(self.data), reduction_indices=2))
    length = tf.reduce_sum(used, reduction_indices=1)
    length = tf.cast(length, tf.int32)
    return length

    @lazy_property
    def prediction(self):
    # Recurrent network.
    outputs, _ = rnn.rnn(
    rnn_cell.GRUCell(self._num_hidden),
    self._unpack_sequence(data),
    dtype=tf.float32,
    sequence_length=self.length,
    )
    # Softmax layer.
    weight, bias = self._weight_and_bias(
    self._num_hidden, int(target.get_shape()[2]))
    predictions = [tf.nn.softmax(tf.matmul(x, weight) + bias)
    for x in outputs]
    prediction = self._pack_sequence(predictions)
    return prediction

    @lazy_property
    def cost(self):
    cross_entropy = self.target * tf.log(self.prediction)
    cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2)
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2))
    cross_entropy = tf.boolean_mask(cross_entropy, tf.cast(mask, tf.bool))
    cross_entropy / tf.reduce_sum(mask, reduction_indices=1)
    return tf.reduce_mean(cross_entropy)

    @lazy_property
    def optimize(self):
    learning_rate = 0.01
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
    return optimizer.minimize(self.cost)

    @lazy_property
    def error(self):
    mistakes = tf.not_equal(
    tf.argmax(self.target, 1), tf.argmax(self.prediction, 1))
    return tf.reduce_mean(tf.cast(mistakes, tf.float32))

    @staticmethod
    def _weight_and_bias(in_size, out_size):
    weight = tf.truncated_normal([in_size, out_size], stddev=0.01)
    bias = tf.constant(0.1, shape=[out_size])
    return tf.Variable(weight), tf.Variable(bias)

    @staticmethod
    def _unpack_sequence(tensor):
    return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))

    @staticmethod
    def _pack_sequence(sequence):
    return tf.transpose(tf.pack(sequence), perm=[1, 0, 2])

    @staticmethod
    def _last_relevant(output, length):
    batch_size = tf.shape(output)[0]
    max_length = int(output.get_shape()[1])
    output_size = int(output.get_shape()[2])
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, output_size])
    relevant = tf.gather(flat, index)
    return relevant


    def get_dataset():
    """Read dataset and flatten images."""
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
    dataset['data'] = dataset.data.reshape(
    dataset.data.shape[:-2] + (-1,)).astype(float)
    train, test = sets.Split(0.66)(dataset)
    return train, test


    if __name__ == '__main__':
    train, test = get_dataset()
    _, length, image_size = train.data.shape
    num_classes = train.target.shape[2]
    data = tf.placeholder(tf.float32, [None, length, image_size])
    target = tf.placeholder(tf.float32, [None, length, num_classes])
    model = VariableSequenceLabelling(data, target)
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    for epoch in range(10):
    for _ in range(100):
    batch = train.sample(10)
    sess.run(model.optimize, {data: batch.data, target: batch.target})
    error = sess.run(model.error, {data: test.data, target: test.target})
    print('Epoch {:2d} error {:3.1f}%'.format(epoch + 1, 100 * error))