#create an index of all the words (words2idx) ... #create an index of all the labels (labels2idx) ... #query to classify q="hotel amsterdam wifi" #labels to use "prop", "dest", "fac" #this will contain all our training data matrix=[] #this array will contain all the indexes of the words words=[] #this array will contain all the indexes of the labels labels=[] #this will contain the arrays of indexes sentence=[] words.append(words2idx["hotel"]) labels.append(labels2idx['proptype']) words.append(words2idx["amsterdam"]) labels.append(labels2idx['dest']) words.append(words2idx["wifi"]) labels.append(labels2idx['fac']) sentence.append(words) sentence.append(labels) sentence.append(labels) matrix.append(sentence) ... #the RNN settings s = { 'fold':3, # 5 folds 0,1,2,3,4 'lr':0.0627142536696559, 'verbose':1, 'decay':False, # decay on the learning rate 'win':7, # number of words in the context window 'bs':9, # number of backprop through time steps 'nhidden':100, # number of hidden units 'seed':345, 'emb_dimension':100, # dimension of word embedding 'nepochs':50} # instanciate the model numpy.random.seed(s['seed']) random.seed(s['seed']) rnn = model( nh = s['nhidden'], nc = nclasses, ne = vocsize, de = s['emb_dimension'], cs = s['win'] ) #separate in train, test, validation ... #train for i in xrange(nsentences): train_lex_list=train_lex[i].tolist()[0] cwords = contextwin(train_lex_list, s['win']) #print cwords words = map(lambda x: numpy.asarray(x).astype('int32'),\ minibatch(cwords, s['bs'])) labels = train_y[i] cnt_w=0 for word_batch , label_last_word in zip(words, labels): #print word_batch, label_last_word #cnt_w+=1 #print cnt_w rnn.train(word_batch, label_last_word, s['clr']) rnn.normalize() if s['verbose']: print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic), sys.stdout.flush()