Last active
March 1, 2023 15:10
-
-
Save honnibal/6a9e5ef2921c0214eeeb to your computer and use it in GitHub Desktop.
Revisions
-
honnibal revised this gist
Oct 26, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -73,7 +73,7 @@ def _make_array(xy): def _init_logreg_weights(n_hidden, n_out): weights = numpy.zeros((n_hidden, n_out), dtype=theano.config.floatX) bias = numpy.zeros((n_out,), dtype=theano.config.floatX) return ( theano.shared(name='W', borrow=True, value=weights), theano.shared(name='b', borrow=True, value=bias) -
honnibal revised this gist
Oct 26, 2015 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,6 +14,9 @@ * Inputs streamed to model, not pre-loaded as given * Minibatch size 1, i.e. `true' stochastic update * No early stopping Released under MIT license Copyright Matthew Honnibal, 2015. """ import os import sys -
honnibal revised this gist
Aug 29, 2015 . No changes.There are no files selected for viewing
-
honnibal revised this gist
Jun 22, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -69,7 +69,7 @@ def _make_array(xy): def _init_logreg_weights(n_hidden, n_out): weights = numpy.zeros((n_hidden, n_out), dtype=theano.config.floatX) bias = numpy.zeros((10,), dtype=theano.config.floatX) return ( theano.shared(name='W', borrow=True, value=weights), -
honnibal revised this gist
Jun 19, 2015 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -176,8 +176,7 @@ def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', n_hidden=500): train_examples, dev_examples, test_examples = load_data(dataset) print '... building the model' train_model, evaluate_model = compile_model(28*28, 10, n_hidden, learning_rate, L1_reg, L2_reg) print '... training' for epoch in range(1, n_epochs+1): for x, y in train_examples: -
honnibal revised this gist
Jun 19, 2015 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,7 +14,6 @@ * Inputs streamed to model, not pre-loaded as given * Minibatch size 1, i.e. `true' stochastic update * No early stopping """ import os import sys -
honnibal revised this gist
Jun 19, 2015 . 1 changed file with 12 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,19 +2,14 @@ Based on the tutorial here: http://deeplearning.net/tutorial/mlp.html This example trims away some complexities, and makes it easier to see how Theano works. Design changes: * Model compiled in a distinct function, so that symbolic variables are not in run-time scope. * No classes. Network shown by chained function calls. Some features of original have been dropped: * Inputs streamed to model, not pre-loaded as given * Minibatch size 1, i.e. `true' stochastic update @@ -83,7 +78,7 @@ def _init_logreg_weights(n_hidden, n_out): ) def _init_hidden_weights(n_in, n_out): rng = numpy.random.RandomState(1234) weights = numpy.asarray( rng.uniform( @@ -131,6 +126,11 @@ def compile_model(n_in, n_classes, n_hidden, learning_rate, L1_reg, L2_reg): # allocate symbolic variables for the data x = T.vector('x') # Features y = T.iscalar('y') # (Gold) Label # Allocate and initialize weights. These are stored internally, and updated. hidden_W, hidden_b = _init_hidden_weights(n_in, n_hidden) logreg_W, logreg_b = _init_logreg_weights(n_hidden, n_classes) # Estimate P(y | x) given the current weights p_y_given_x = feed_forward( T.nnet.softmax, @@ -189,4 +189,4 @@ def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, if __name__ == '__main__': main() -
honnibal revised this gist
Jun 19, 2015 . 1 changed file with 15 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,9 +1,22 @@ """A stripped-down MLP example, using Theano. Based on the tutorial here: http://deeplearning.net/tutorial/mlp.html Theano is very unintuitive the first time you see it, and I found the MLP tutorial especially confusing. I think my version is clearer for two reasons: 1. The model is compiled in a distinct function, that only returns the train/eval functions. This way the symbolic variables are not in scope of the main function, making it clear that they are not part of the run-time. 2. No classes. The network is shown by simply chaining together the function calls of the components. I also made some simplifications, pruning out details which are necessary for real-world use, but complicate the example: * Inputs streamed to model, not pre-loaded as given * Minibatch size 1, i.e. `true' stochastic update * No early stopping * Model compiled in one scope. No classes. -
honnibal revised this gist
Jun 19, 2015 . 1 changed file with 16 additions and 22 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,10 +8,10 @@ * No early stopping * Model compiled in one scope. No classes. """ import os import sys import time from os import path import numpy @@ -31,12 +31,12 @@ def load_data(dataset): data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. data_dir = os.path.join(os.path.split(__file__)[0], "..", "data") if not path.exists(data_dir): print "No data directory to save data to. Try:" print "mkdir ../data" sys.exit(1) new_path = path.join(data_dir, data_file) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path @@ -61,7 +61,7 @@ def _make_array(xy): numpy.asarray(data_y, dtype='int32')) def _init_logreg_weights(n_hidden, n_out): weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX) bias = numpy.zeros((10,), dtype=theano.config.floatX) return ( @@ -98,7 +98,7 @@ def feed_forward(activation, weights, bias, input_): return activation(T.dot(input_, weights) + bias) def sgd_step(param, cost, learning_rate): return param - (learning_rate * T.grad(cost, param)) # These are also symbolic. def L1(L1_reg, w1, w2): @@ -109,7 +109,7 @@ def L2(L2_reg, w1, w2): return L2_reg * ((w1 ** 2).sum() + (w2 ** 2).sum()) def compile_model(n_in, n_classes, n_hidden, learning_rate, L1_reg, L2_reg): '''Compile train and evaluation functions, which we'll then call iteratively to train the parameters. This function is called exactly once --- think of it like a compiler. We declare variables, allocate memory, and define some @@ -118,12 +118,6 @@ def compile_model(n_in, n_classes, n_hidden, learning_rate, L1, L2): # allocate symbolic variables for the data x = T.vector('x') # Features y = T.iscalar('y') # (Gold) Label # Estimate P(y | x) given the current weights p_y_given_x = feed_forward( T.nnet.softmax, @@ -147,10 +141,10 @@ def compile_model(n_in, n_classes, n_hidden, learning_rate, L1, L2): # also define how to update the weights based on the input label. train_model = theano.function( inputs=[x, y], outputs=cost, # <-- Output depends on cost, which depends on P(y | x) updates=[ (logreg_W, sgd_step(logreg_W, cost, learning_rate)), (logreg_b, sgd_step(logreg_b, cost, learning_rate)), (hidden_W, sgd_step(hidden_W, cost, learning_rate)), (hidden_b, sgd_step(hidden_b, cost, learning_rate)), ] @@ -168,10 +162,10 @@ def compile_model(n_in, n_classes, n_hidden, learning_rate, L1, L2): def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', n_hidden=500): train_examples, dev_examples, test_examples = load_data(dataset) print '... building the model' train_model, evaluate_model = compile_model(28*28, 10, n_hidden, learning_rate, L1_reg, L2_reg) print '... training' for epoch in range(1, n_epochs+1): for x, y in train_examples: -
honnibal created this gist
Jun 19, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,185 @@ """A stripped-down MLP example, using Theano. Based on the tutorial here: Except: * Minibatch size 1, i.e. `true' stochastic update * No early stopping * Model compiled in one scope. No classes. """ import os import sys import time import numpy import theano import theano.tensor as T import gzip import cPickle def load_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' # Download the MNIST dataset if it is not present data_dir, data_file = os.path.split(dataset) if data_dir == "" and not os.path.isfile(dataset): # Check if dataset is in the data directory. new_path = os.path.join( os.path.split(__file__)[0], "..", "data", dataset ) if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': dataset = new_path if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': import urllib url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz' print 'Downloading data from %s' % url urllib.urlretrieve(url, dataset) print '... loading data' # Load the dataset with gzip.open(dataset, 'rb') as f: train_set, valid_set, test_set = cPickle.load(f) return _make_array(train_set), _make_array(valid_set), _make_array(test_set) def _make_array(xy): data_x, data_y = xy return zip( numpy.asarray(data_x, dtype=theano.config.floatX), numpy.asarray(data_y, dtype='int32')) def _init_maxent_weights(n_hidden, n_out): weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX) bias = numpy.zeros((10,), dtype=theano.config.floatX) return ( theano.shared(name='W', borrow=True, value=weights), theano.shared(name='b', borrow=True, value=bias) ) def _init_hidden_weights(n_in, n_out, activation=T.tanh): rng = numpy.random.RandomState(1234) weights = numpy.asarray( rng.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) bias = numpy.zeros((n_out,), dtype=theano.config.floatX) return ( theano.shared(value=weights, name='W', borrow=True), theano.shared(value=bias, name='b', borrow=True) ) # Define how an input is fed through a layer of the network, and how a step of # the stochastic gradient descent is computed. # Note that these are *symbolic expressions* --- we are just compiling code here. # These functions are only called during compile_model. The *actual* feed-forward # and SGD update procedures, which happen iteratively on each example, are # Theano-internal. def feed_forward(activation, weights, bias, input_): return activation(T.dot(input_, weights) + bias) def sgd_step(param, cost, learning_rate): return param - (learnign_rate * T.grad(cost, param)) # These are also symbolic. def L1(L1_reg, w1, w2): return L1_reg * (abs(w1).sum() + abs(w2).sum()) def L2(L2_reg, w1, w2): return L2_reg * ((w1 ** 2).sum() + (w2 ** 2).sum()) def compile_model(n_in, n_classes, n_hidden, learning_rate, L1, L2): '''Compile train and evaluation functions, which we'll then call iteratively to train the parameters. This function is called exactly once --- think of it like a compiler. We declare variables, allocate memory, and define some computation. ''' # allocate symbolic variables for the data x = T.vector('x') # Features y = T.iscalar('y') # (Gold) Label # Weights and bias term for the hidden layer hidden_W, hidden_b = _init_hidden_weights(n_in, n_hidden, T.tanh) # Weights and bias term for the softmax (logistic regression) layer logreg_W, logreg_b = _init_logreg_weights(n_hidden, n_classes) # Estimate P(y | x) given the current weights p_y_given_x = feed_forward( T.nnet.softmax, logreg_W, logreg_b, feed_forward( T.tanh, hidden_W, hidden_b, x)) # <--- Our input variable (the features) cost = ( -T.log(p_y_given_x[0, y]) # <-- Negative log likelihood of gold label + L1(L1_reg, logreg_W, hidden_W) + L2(L2_reg, logreg_W, hidden_W) ) # Compile the training function. Successive calls to this update the weights. # Internal state is maintained. # The output is "cost", which requires the computation of p_y_given_x. We # also define how to update the weights based on the input label. train_model = theano.function( inputs=[x, y], outputs=cost, updates=[ (maxent_W, sgd_step(logreg_W, cost, learning_rate)), (maxent_b, sgd_step(logreg_W, cost, learning_rate)), (hidden_W, sgd_step(hidden_W, cost, learning_rate)), (hidden_b, sgd_step(hidden_b, cost, learning_rate)), ] ) # Compile the evaluation function, which returns a 0/1 loss wrt the true # label. Note that the output depends on p_y_given_x, so the program must # compute it. evaluate_model = theano.function( inputs=[x, y], outputs=T.neq(y, T.argmax(p_y_given_x[0])), ) return train_model, evaluate_model def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', n_hidden=500): print '... building the model' train_model, evaluate_model = build_model(n_hidden, 10, learning_rate, L1_reg, L2_reg) train_examples, dev_examples, test_examples = load_data(dataset) print '... training' for epoch in range(1, n_epochs+1): for x, y in train_examples: train_model(x, y) # compute zero-one loss on validation set error = numpy.mean([evaluate_model(x, y) for x, y in dev_examples]) print('epoch %i, validation error %f %%' % (epoch, error * 100)) if __name__ == '__main__': main()