012-lab-001. basic rnn with tf import tensorflow as tf import numpy as np tf.set_random_seed(777) idx2char = ['h', 'i', 'e', 'l', 'o'] # We will teach hello by this 2 connection # hihell -> ihello # Sequence size is 6 x_data = [[0, 1, 0, 2, 3, 3]] # hihell x_one_hot = [[[1, 0, 0, 0, 0], # h 0 [0, 1, 0, 0, 0], # i 1 [1, 0, 0, 0, 0], # h 0 [0, 0, 1, 0, 0], # e 2 [0, 0, 0, 1, 0], # l 3 [0, 0, 0, 1, 0]]] # l 3 y_data = [[1, 0, 2, 3, 3, 4]] # ihello num_classes = 5 input_dim = 5 # one-hot size # output size is determined based on size of hidden_size # output size is regardless of size of input hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot batch_size = 1 # one sentence # How many cells you want to use? # This is determined by shape of input data = (1,6,4) sequence_length = 6 # |ihello| == 6 learning_rate = 0.1 X = tf.placeholder( tf.float32, [None, sequence_length, input_dim]) # X one-hot Y = tf.placeholder(tf.int32, [None, sequence_length]) # Y label # 1st step of rnn with tf: You create cell # You can choose kind of cell # num_units=hidden_size is size of $$$h_{t}$$$ cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) # You can easily change kind of cell # cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size, state_is_tuple=True) initial_state = cell.zero_state(batch_size, tf.float32) # 2nd step of rnn with tf: You pass cell we created and input data $$$X_{t}$$$ # outputs: $$$h_{t}$$$ which is used directly # _states: new state which is not much used directly outputs, _states = tf.nn.dynamic_rnn( cell, X, initial_state=initial_state, dtype=tf.float32) # FC layer X_for_fc = tf.reshape(outputs, [-1, hidden_size]) # fc_w = tf.get_variable("fc_w", [hidden_size, num_classes]) # fc_b = tf.get_variable("fc_b", [num_classes]) # outputs = tf.matmul(X_for_fc, fc_w) + fc_b outputs = tf.contrib.layers.fully_connected( inputs=X_for_fc, num_outputs=num_classes, activation_fn=None) # reshape out for sequence_loss outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) weights = tf.ones([batch_size, sequence_length]) sequence_loss = tf.contrib.seq2seq.sequence_loss( logits=outputs, targets=Y, weights=weights) loss = tf.reduce_mean(sequence_loss) train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) prediction = tf.argmax(outputs, axis=2) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(50): l, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data}) result = sess.run(prediction, feed_dict={X: x_one_hot}) print(i, "loss:", l, "prediction: ", result, "true Y: ", y_data) # print char using dic result_str = [idx2char[c] for c in np.squeeze(result)] print("\tPrediction str: ", ''.join(result_str)) ''' 0 loss: 1.71584 prediction: [[2 2 2 3 3 2]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: eeelle 1 loss: 1.56447 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: llllll 2 loss: 1.46284 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: llllll 3 loss: 1.38073 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: llllll 4 loss: 1.30603 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: llllll 5 loss: 1.21498 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: llllll 6 loss: 1.1029 prediction: [[3 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: lhlllo 7 loss: 0.982386 prediction: [[1 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: ihlllo 8 loss: 0.871259 prediction: [[1 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: ihlllo 9 loss: 0.774338 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: ihello 10 loss: 0.676005 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]] Prediction str: ihello ... ''' x_data=np.array([[h,e,l,l,o], [e,o,l,l,o], [l,l,e,e,l]],dtype=np.float32) # In one rnn cell, above array will become as following in one hot encoding # input dimension: 4 # output dimension: 2 # sequence: 5 # batch: 3 pp.pprint(x_data) # [[[1,0,0,0], # [1,0,0,0], # [1,0,0,0], # [1,0,0,0]], # [[1,0,0,0], # [1,0,0,0], # [1,0,0,0], # [1,0,0,0]], # [[1,0,0,0], # [1,0,0,0], # [1,0,0,0], # [1,0,0,0]]] cell=rnn.BasicLSTMCell(num_units=2,state_is_tuple=True) outputs,_states=tf.nn.dynamic_rnn( cell, x_data, dtype=tf.float32) sess.run(tf.global_variables_initializer()) pp.pprint(outputs.eval())