012-lab-001. basic rnn with tf
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)
idx2char = ['h', 'i', 'e', 'l', 'o']
# We will teach hello by this 2 connection
# hihell -> ihello
# Sequence size is 6
x_data = [[0, 1, 0, 2, 3, 3]] # hihell
x_one_hot = [[[1, 0, 0, 0, 0], # h 0
[0, 1, 0, 0, 0], # i 1
[1, 0, 0, 0, 0], # h 0
[0, 0, 1, 0, 0], # e 2
[0, 0, 0, 1, 0], # l 3
[0, 0, 0, 1, 0]]] # l 3
y_data = [[1, 0, 2, 3, 3, 4]] # ihello
num_classes = 5
input_dim = 5 # one-hot size
# output size is determined based on size of hidden_size
# output size is regardless of size of input
hidden_size = 5 # output from the LSTM. 5 to directly predict one-hot
batch_size = 1 # one sentence
# How many cells you want to use?
# This is determined by shape of input data = (1,6,4)
sequence_length = 6 # |ihello| == 6
learning_rate = 0.1
X = tf.placeholder(
tf.float32, [None, sequence_length, input_dim]) # X one-hot
Y = tf.placeholder(tf.int32, [None, sequence_length]) # Y label
# 1st step of rnn with tf: You create cell
# You can choose kind of cell
# num_units=hidden_size is size of $$$h_{t}$$$
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
# You can easily change kind of cell
# cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
# 2nd step of rnn with tf: You pass cell we created and input data $$$X_{t}$$$
# outputs: $$$h_{t}$$$ which is used directly
# _states: new state which is not much used directly
outputs, _states = tf.nn.dynamic_rnn(
cell, X, initial_state=initial_state, dtype=tf.float32)
# FC layer
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
# fc_w = tf.get_variable("fc_w", [hidden_size, num_classes])
# fc_b = tf.get_variable("fc_b", [num_classes])
# outputs = tf.matmul(X_for_fc, fc_w) + fc_b
outputs = tf.contrib.layers.fully_connected(
inputs=X_for_fc, num_outputs=num_classes, activation_fn=None)
# reshape out for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
prediction = tf.argmax(outputs, axis=2)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(50):
l, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
result = sess.run(prediction, feed_dict={X: x_one_hot})
print(i, "loss:", l, "prediction: ", result, "true Y: ", y_data)
# print char using dic
result_str = [idx2char[c] for c in np.squeeze(result)]
print("\tPrediction str: ", ''.join(result_str))
'''
0 loss: 1.71584 prediction: [[2 2 2 3 3 2]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: eeelle
1 loss: 1.56447 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: llllll
2 loss: 1.46284 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: llllll
3 loss: 1.38073 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: llllll
4 loss: 1.30603 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: llllll
5 loss: 1.21498 prediction: [[3 3 3 3 3 3]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: llllll
6 loss: 1.1029 prediction: [[3 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: lhlllo
7 loss: 0.982386 prediction: [[1 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: ihlllo
8 loss: 0.871259 prediction: [[1 0 3 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: ihlllo
9 loss: 0.774338 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: ihello
10 loss: 0.676005 prediction: [[1 0 2 3 3 4]] true Y: [[1, 0, 2, 3, 3, 4]]
Prediction str: ihello
...
'''
x_data=np.array([[h,e,l,l,o],
[e,o,l,l,o],
[l,l,e,e,l]],dtype=np.float32)
# In one rnn cell, above array will become as following in one hot encoding
# input dimension: 4
# output dimension: 2
# sequence: 5
# batch: 3
pp.pprint(x_data)
# [[[1,0,0,0],
# [1,0,0,0],
# [1,0,0,0],
# [1,0,0,0]],
# [[1,0,0,0],
# [1,0,0,0],
# [1,0,0,0],
# [1,0,0,0]],
# [[1,0,0,0],
# [1,0,0,0],
# [1,0,0,0],
# [1,0,0,0]]]
cell=rnn.BasicLSTMCell(num_units=2,state_is_tuple=True)
outputs,_states=tf.nn.dynamic_rnn(
cell,
x_data,
dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())