010-002. rnn, train words, predict last character # You will train words which have 4 characters, # so,when you give 3 characters to model, # it will predict corresponding 1 character import tensorflow as tf import numpy as np char_arr=['a','b','c','d','e','f','g', 'h','i','j','k','l','m','n', 'o','p','q','r','s','t','u', 'v','w','x','y','z'] # You create correlational array, # to use one-hot-encoding and decoding # {'a': 0,'b': 1,'c': 2,...,'j': 9,'k',10,...} num_dic={n: i for i,n in enumerate(char_arr)} # < {'a': 0, # < 'b': 1, # < 'c': 2, # < 'd': 3, # < 'e': 4, # < 'f': 5, # < 'g': 6, # < 'h': 7, # < 'i': 8, # < 'j': 9, # < 'k': 10, # < 'l': 11, # < 'm': 12, # < 'n': 13, # < 'o': 14, # < 'p': 15, # < 'q': 16, # < 'r': 17, # < 's': 18, # < 't': 19, # < 'u': 20, # < 'v': 21, # < 'w': 22, # < 'x': 23, # < 'y': 24, # < 'z': 25} dic_len=len(num_dic) # < 26 # You will use following array as input and output, # in following usage # wor is X,d is Y from "word" # woo is X,d is Y from "wood" seq_data=['word','wood','deep','dive','cold','cool','load','love','kiss','kind'] def make_batch(seq_data): input_batch=[] target_batch=[] for seq in seq_data: # What you create here for input batch is index number of char_arr # batch1,batch2,... # [22,14,17],[22,14,14],[3,4,4],[3,8,21] ... # wor # [22,14,17] input=[num_dic[n] for n in seq[:-1]] # 3,3,15,4,3 ... target=num_dic[seq[-1]] # You perform one-hot-encoding # if input is [0,1,2]: # [[ 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] # [ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] # [ 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]] input_batch.append(np.eye(dic_len)[input]) # Loss function softmax_cross_entropy_with_logits should take label in one-hot-encoding # But,sparse_softmax_cross_entropy_with_logits can take index value, # because it doesn't use one-hot-encoding target_batch.append(target) return input_batch,target_batch # @ # You configure options learning_rate=0.01 n_hidden=128 total_epoch=30 # step of type: [1 2 3] => 3 # n_step is number of sequence which consists of rnn n_step=3 # Size of input will be 26, # because of one-hot-encoding of alphabet characters # Output also can be classified by 26 alphabet characters n_input=n_class=dic_len # @ # You build rnn model X=tf.placeholder(tf.float32,[None,n_step,n_input]) # Since you will use sparse_softmax_cross_entropy_with_logits() as loss function, # format of label will be index number, not one-hot vector # So, you will use 1 dimension array with 1 element # [3] [3] [15] [4] ... # If you use one-hot-encoding, # format of input should be [None,n_class] Y=tf.placeholder(tf.int32,[None]) W=tf.Variable(tf.random_normal([n_hidden,n_class])) b=tf.Variable(tf.random_normal([n_class])) # You craete rnn cell cell1=tf.nn.rnn_cell.BasicLSTMCell(n_hidden) # You use dropout to prevent overfitting cell1=tf.nn.rnn_cell.DropoutWrapper(cell1,output_keep_prob=0.5) # You additionally create more rnn cell cell2=tf.nn.rnn_cell.BasicLSTMCell(n_hidden) # You create merged cell combined by multiple cells multi_cell=tf.nn.rnn_cell.MultiRNNCell([cell1,cell2]) # You create recurrent neural network by using tf.nn.dynamic_rnn() # time_major=True outputs,states=tf.nn.dynamic_rnn(multi_cell,X,dtype=tf.float32) # You convert final result into one-hot-encoding format outputs=tf.transpose(outputs,[1,0,2]) outputs=outputs[-1] model=tf.matmul(outputs,W) + b cost=tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=model,labels=Y)) optimizer=tf.train.AdamOptimizer(learning_rate).minimize(cost) # @ # You train rnn model sess=tf.Session() sess.run(tf.global_variables_initializer()) input_batch,target_batch=make_batch(seq_data) for epoch in range(total_epoch): _,loss=sess.run([optimizer,cost], feed_dict={X: input_batch,Y: target_batch}) print('Epoch:','%04d' % (epoch + 1), 'cost =','{:.6f}'.format(loss)) print('Completed optimization') # @ # You see result # Since label is int, you convert predicton into int prediction=tf.cast(tf.argmax(model,1),tf.int32) # Since you don't use one-hot-encoding on prediction and label, # you just compare them in index number prediction_check=tf.equal(prediction,Y) accuracy=tf.reduce_mean(tf.cast(prediction_check,tf.float32)) input_batch,target_batch=make_batch(seq_data) predict,accuracy_val=sess.run([prediction,accuracy],\ feed_dict={X: input_batch,Y: target_batch}) predict_words=[] for idx,val in enumerate(seq_data): last_char=char_arr[predict[idx]] predict_words.append(val[:3] + last_char) print('\n=== Prediction ===') print('Input:',[w[:3] + ' ' for w in seq_data]) print('Prediction:',predict_words) print('Accuracy:',accuracy_val)