012-lab-003. rnn for long sentence import tensorflow as tf import numpy as np tf.set_random_seed(777) sample = " if you want you" # set(sample) extracts unique characters idx2char = list(set(sample)) char2idx = {c: i for i, c in enumerate(idx2char)} # size of input data(one hot size) dic_size = len(char2idx) hidden_size = len(char2idx) # RNN output size num_classes = len(char2idx) # final output size (RNN or softmax, etc.) batch_size = 1 # one sample data, one batch sequence_length = len(sample) - 1 # number of lstm rollings (unit #) learning_rate = 0.1 # character to index sample_idx = [char2idx[c] for c in sample] # if you want yo x_data = [sample_idx[:-1]] # f you want you y_data = [sample_idx[1:]] X = tf.placeholder(tf.int32, [None, sequence_length]) Y = tf.placeholder(tf.int32, [None, sequence_length]) # num_classes = size of idx2char(number of unique characters) # Be careful on shape when you use one_hot() x_one_hot = tf.one_hot(X, num_classes) cell = tf.contrib.rnn.BasicLSTMCell( num_units=hidden_size, state_is_tuple=True) initial_state = cell.zero_state(batch_size, tf.float32) outputs, _states = tf.nn.dynamic_rnn( cell, x_one_hot, initial_state=initial_state, dtype=tf.float32) # FC layer X_for_fc = tf.reshape(outputs, [-1, hidden_size]) outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None) # reshape out for sequence_loss outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) weights = tf.ones([batch_size, sequence_length]) sequence_loss = tf.contrib.seq2seq.sequence_loss( logits=outputs, targets=Y, weights=weights) loss = tf.reduce_mean(sequence_loss) train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) prediction = tf.argmax(outputs, axis=2) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(50): l, _ = sess.run([loss, train], feed_dict={X: x_data, Y: y_data}) result = sess.run(prediction, feed_dict={X: x_data}) # print char using dic result_str = [idx2char[c] for c in np.squeeze(result)] print(i, "loss:", l, "Prediction:", ''.join(result_str)) ''' 0 loss: 2.35377 Prediction: uuuuuuuuuuuuuuu 1 loss: 2.21383 Prediction: yy you y you 2 loss: 2.04317 Prediction: yy yoo ou 3 loss: 1.85869 Prediction: yy ou uou 4 loss: 1.65096 Prediction: yy you a you 5 loss: 1.40243 Prediction: yy you yan you 6 loss: 1.12986 Prediction: yy you wann you 7 loss: 0.907699 Prediction: yy you want you 8 loss: 0.687401 Prediction: yf you want you 9 loss: 0.508868 Prediction: yf you want you 10 loss: 0.379423 Prediction: yf you want you 11 loss: 0.282956 Prediction: if you want you 12 loss: 0.208561 Prediction: if you want you ... ''' from __future__ import print_function import tensorflow as tf import numpy as np from tensorflow.contrib import rnn tf.set_random_seed(777) sentence = ("if you want to build a ship, don't drum up people together to " "collect wood and don't assign them tasks and work, but rather " "teach them to long for the endless immensity of the sea.") # Extract unique characters and then make it as list char_set = list(set(sentence)) char_dic = {w: i for i, w in enumerate(char_set)} data_dim = len(char_set) hidden_size = len(char_set) num_classes = len(char_set) # You can define whatever you want # This is length part of sentence you learning # For example, x is 'if you wan' and y is 'f you want' sequence_length = 10 learning_rate = 0.1 dataX = [] dataY = [] # This is process of generating train dataset for i in range(0, len(sentence) - sequence_length): x_str = sentence[i:i + sequence_length] y_str = sentence[i + 1: i + sequence_length + 1] print(i, x_str, '->', y_str) x = [char_dic[c] for c in x_str] y = [char_dic[c] for c in y_str] dataX.append(x) dataY.append(y) # We have many instances in entire train dataset # So, we need to use batch size batch_size = len(dataX) X = tf.placeholder(tf.int32, [None, sequence_length]) Y = tf.placeholder(tf.int32, [None, sequence_length]) # One-hot encoding X_one_hot = tf.one_hot(X, num_classes) print(X_one_hot) # check out the shape # Make a lstm cell with hidden_size (each unit output vector size) def lstm_cell(): cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True) return cell multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(2)], state_is_tuple=True) # outputs: unfolding size x hidden size, state = hidden size outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32) # FC layer X_for_fc = tf.reshape(outputs, [-1, hidden_size]) outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None) # reshape out for sequence_loss outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) # All weights are 1 (equal weights) weights = tf.ones([batch_size, sequence_length]) sequence_loss = tf.contrib.seq2seq.sequence_loss( logits=outputs, targets=Y, weights=weights) mean_loss = tf.reduce_mean(sequence_loss) train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) for i in range(500): _, l, results = sess.run( [train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY}) for j, result in enumerate(results): index = np.argmax(result, axis=1) print(i, j, ''.join([char_set[t] for t in index]), l) # Let's print the last char of each result to check it works results = sess.run(outputs, feed_dict={X: dataX}) for j, result in enumerate(results): index = np.argmax(result, axis=1) if j is 0: # print all for the first result to make a sentence print(''.join([char_set[t] for t in index]), end='') else: print(char_set[index[-1]], end='') ''' 0 167 tttttttttt 3.23111 0 168 tttttttttt 3.23111 0 169 tttttttttt 3.23111 … 499 167 of the se 0.229616 499 168 tf the sea 0.229616 499 169 the sea. 0.229616 g you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea. '''