010-003. seq2seq, translator
# Seq2Seq is model which creates sequence and trains sequence
# Seq2Seq is used for chatbot, translation, image captioning
# @
# You will create translator which translates english word into korean word
import tensorflow as tf
import numpy as np
# S is symbol representing start point of decoding's input
# E is symbol representing end point of decoding's output
# P is symbol representing filling character for empty sequence,
# when size of word is smaller than size of time step of current batch data
# For example, if maxiaml size of current batch data is 4,
# word -> ['w','o','r','d']
# to -> ['t','o','P','P']
characters_list=[one_character for one_character in 'SEPabcdefghijklmnopqrstuvwxyz단어나무놀이소녀키스사랑']
# < ['S',
# < 'E',
# < 'P',
# < 'a',
# < 'b',
# < 'c',
# < 'd',
# < 'e',
# < 'f',
# < 'g',
# < 'h',
# < 'i',
# < 'j',
# < 'k',
# < 'l',
# < 'm',
# < 'n',
# < 'o',
# < 'p',
# < 'q',
# < 'r',
# < 's',
# < 't',
# < 'u',
# < 'v',
# < 'w',
# < 'x',
# < 'y',
# < 'z',
# < '단',
# < '어',
# < '나',
# < '무',
# < '놀',
# < '이',
# < '소',
# < '녀',
# < '키',
# < '스',
# < '사',
# < '랑']
character_index_dic={word:index for index,word in enumerate(characters_list)}
# {'E': 1,
# 'P': 2,
# 'S': 0,
# 'a': 3,
# 'b': 4,
# 'c': 5,
# 'd': 6,
# 'e': 7,
# 'f': 8,
# 'g': 9,
# 'h': 10,
# 'i': 11,
# 'j': 12,
# 'k': 13,
# 'l': 14,
# 'm': 15,
# 'n': 16,
# 'o': 17,
# 'p': 18,
# 'q': 19,
# 'r': 20,
# 's': 21,
# 't': 22,
# 'u': 23,
# 'v': 24,
# 'w': 25,
# 'x': 26,
# 'y': 27,
# 'z': 28,
# '나': 31,
# '녀': 36,
# '놀': 33,
# '단': 29,
# '랑': 40,
# '무': 32,
# '사': 39,
# '소': 35,
# '스': 38,
# '어': 30,
# '이': 34,
# '키': 37}
length_of_character_index_dic=len(character_index_dic)
# 41
# This is train dataset for translator of english to korean
train_dataset_list=[['word','단어'],['wood','나무'],
['game','놀이'],['girl','소녀'],
['kiss','키스'],['love','사랑']]
def make_batch(train_dataset_list):
input_batch_one_hot_encoded_english_word_list=[]
output_batch_one_hot_encoded_translated_korean_word_list=[]
target_batch=[]
for one_train_dataset_list in train_dataset_list:
# one_train_dataset_list[0]
# word
# w
# character_index_dic[w]
# 25
# [25,17,20,6]
one_english_word_represented_in_number_list\
=[character_index_dic[n] for n in one_train_dataset_list[0]]
# one_train_dataset_list[1]
# 단어
# S단어
# S
# character_index_dic[S]
# 0
# [0,29,30]
one_korean_translated_word_list\
=[character_index_dic[n] for n in ('S'+one_train_dataset_list[1])]
# one_train_dataset_list[1]
# 단어
# 단어E
# 단
# character_index_dic[단]
# 29
# [29,30,1]
one_korean_translated_word_target_list\
=[character_index_dic[n] for n in (one_train_dataset_list[1]+'E')]
# np.eye(41)[[25,17,20,6]].shape
# (4,41)
# np.eye(41)[[25,17,20,6]]
# [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.]]
input_batch_one_hot_encoded_english_word_list.append(np.eye(length_of_character_index_dic)[one_english_word_represented_in_number_list])
# np.eye(41)[[0,29,30]].shape
# (3,41)
# np.eye(41)[[0,29,30]]
# [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.],
# [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.]]
output_batch_one_hot_encoded_translated_korean_word_list\
.append(np.eye(length_of_character_index_dic)[one_korean_translated_word_list])
# Prediction value and label value don't need to be one-hot-encoded,
# because you will use sparse_softmax_cross_entropy_with_logits(),
# as loss function
target_batch.append(one_korean_translated_word_target_list)
return input_batch_one_hot_encoded_english_word_list\
,output_batch_one_hot_encoded_translated_korean_word_list\
,target_batch
# You configure options
learning_rate=0.01
number_of_hidden_layer=128
total_epoch=100
# Since input and output are one-hot-encoding,
# size of them are same
number_of_class=number_of_input=length_of_character_index_dic
# @
# You build neural network model
# In case of Seq2Seq model,
# format of encoder's input and format of decoder's input are same
# [None,None,number_of_input]=[batch_size,time_steps,input_size]
input_data_to_encoder_placeholder_node\
=tf.placeholder(tf.float32,[None,None,number_of_input])
input_data_to_decoder_placeholder_node\
=tf.placeholder(tf.float32,[None,None,number_of_input])
# [None,None]=[batch size,time steps]
target_data_placeholder_node=tf.placeholder(tf.int64,[None,None])
# You build encoder cell
with tf.variable_scope('encode'):
encoder_cell_node=tf.nn.rnn_cell.BasicRNNCell(number_of_hidden_layer)
droppedout_encoder_cell_node=tf.nn.rnn_cell.DropoutWrapper(encoder_cell_node,output_keep_prob=0.5)
# You build recurrent neural network
output_out_of_encoder,state_out_of_encoder=tf.nn.dynamic_rnn(\
droppedout_encoder_cell_node\
,input_data_to_encoder_placeholder_node\
,dtype=tf.float32)
# You build decoder cell
with tf.variable_scope('decode'):
decoder_cell_node=tf.nn.rnn_cell.BasicRNNCell(number_of_hidden_layer)
droppedout_decoder_cell_node=tf.nn.rnn_cell.DropoutWrapper(decoder_cell_node,output_keep_prob=0.5)
# Most important point of Seq2Seq concept is
# that you will input last state value of encoder initial_state=state_out_of_encoder,
# into decoder's initial state
output_out_of_decoder,state_out_of_decoder=tf.nn.dynamic_rnn(\
droppedout_decoder_cell_node\
,input_data_to_decoder_placeholder_node\
,initial_state=state_out_of_encoder\
,dtype=tf.float32)
# I create model
model=tf.layers.dense(output_out_of_decoder,number_of_class,activation=None)
# I create loss function
cost_function_node=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\
logits=model,labels=target_data_placeholder_node))
# I create adam_optimizer_node
adam_optimizer_node=tf.train.AdamOptimizer(learning_rate)
to_be_trained_node=adam_optimizer_node.minimize(cost_function_node)
# @
# You train neural network model
sess_object=tf.Session()
sess_object.run(tf.global_variables_initializer())
input_batch_one_hot_encoded_english_word_list\
,output_batch_one_hot_encoded_translated_korean_word_list\
,target_batch\
=make_batch(train_dataset_list)
for epoch in range(total_epoch):
_,loss_value=sess_object.run(\
[to_be_trained_node,cost_function_node]\
,feed_dict={\
input_data_to_encoder_placeholder_node:input_batch_one_hot_encoded_english_word_list\
,input_data_to_decoder_placeholder_node:output_batch_one_hot_encoded_translated_korean_word_list\
,target_data_placeholder_node:target_batch})
print('Epoch:','%04d'%(epoch+1),'cost_function_node =','{:.6f}'.format(loss_value))
print('Completed optimization')
# @
# You can test translator
# This method takes word and predict translated word
def translate(word):
# This model uses [englishword,koreanword],
# as input data and output data
# But at prediction time,this model doesn't know korean word
# So,you should fill input value and output value of decoder,
# with meanless characters like P
# ['wood','PPPP']
train_dataset_list=[word,'P'*len(word)]
# ['wood','PPPP']
input_batch_one_hot_encoded_english_word_list\
,output_batch_one_hot_encoded_translated_korean_word_list\
,target_batch\
=make_batch([train_dataset_list])
# model=tf.layers.dense(output_out_of_decoder,number_of_class,activation=None)
# Output of model will be format of [batch_size,time_step,input]
# You extract highest value from 2nd dimension(dimension of input)
# Then, you will make that value as prediction word which has highest probability
prediction_word_node=tf.argmax(model,2)
result_of_prediction=sess_object.run(\
prediction_word_node\
,feed_dict={\
input_data_to_encoder_placeholder_node:input_batch_one_hot_encoded_english_word_list\
,input_data_to_decoder_placeholder_node:output_batch_one_hot_encoded_translated_korean_word_list\
,target_data_placeholder_node:target_batch})
# You create character array,
# by bringing character corresponding index number of result
# i will be index number
# characters_list[4]=b
decoded_to_word_list=[characters_list[i] for i in result_of_prediction[0]]
# You remove all words after "E"
# Then,you create string
index_of_end_point=decoded_to_word_list.index('E')
final_translated_word_string=''.join(decoded_to_word_list[:index_of_end_point])
return final_translated_word_string
print('\n=== Translation test ===')
print('word ->',translate('word'))
print('wodr ->',translate('wodr'))
print('love ->',translate('love'))
print('loev ->',translate('loev'))
print('abcd ->',translate('abcd'))
# < Epoch: 0001 cost_function_node=3.719440
# < Epoch: 0002 cost_function_node=2.826655
# < Epoch: 0003 cost_function_node=1.577134
# < Epoch: 0004 cost_function_node=0.949842
# < Epoch: 0005 cost_function_node=0.630703
# < Epoch: 0006 cost_function_node=0.409163
# < Epoch: 0007 cost_function_node=0.270477
# < Epoch: 0008 cost_function_node=0.136079
# < Epoch: 0009 cost_function_node=0.179610
# < Epoch: 0010 cost_function_node=0.122789
# < Epoch: 0011 cost_function_node=0.315257
# < Epoch: 0012 cost_function_node=0.209083
# < Epoch: 0013 cost_function_node=0.073182
# < Epoch: 0014 cost_function_node=0.158532
# < Epoch: 0015 cost_function_node=0.073537
# < Epoch: 0016 cost_function_node=0.131775
# < Epoch: 0017 cost_function_node=0.024948
# < Epoch: 0018 cost_function_node=0.016395
# < Epoch: 0019 cost_function_node=0.023747
# < Epoch: 0020 cost_function_node=0.006037
# < Epoch: 0021 cost_function_node=0.007485
# < Epoch: 0022 cost_function_node=0.011314
# < Epoch: 0023 cost_function_node=0.006030
# < Epoch: 0024 cost_function_node=0.004620
# < Epoch: 0025 cost_function_node=0.009484
# < Epoch: 0026 cost_function_node=0.002795
# < Epoch: 0027 cost_function_node=0.002990
# < Epoch: 0028 cost_function_node=0.004092
# < Epoch: 0029 cost_function_node=0.009223
# < Epoch: 0030 cost_function_node=0.003198
# < Epoch: 0031 cost_function_node=0.003220
# < Epoch: 0032 cost_function_node=0.002265
# < Epoch: 0033 cost_function_node=0.003203
# < Epoch: 0034 cost_function_node=0.002904
# < Epoch: 0035 cost_function_node=0.003606
# < Epoch: 0036 cost_function_node=0.001644
# < Epoch: 0037 cost_function_node=0.001191
# < Epoch: 0038 cost_function_node=0.003059
# < Epoch: 0039 cost_function_node=0.002428
# < Epoch: 0040 cost_function_node=0.003616
# < Epoch: 0041 cost_function_node=0.001635
# < Epoch: 0042 cost_function_node=0.000851
# < Epoch: 0043 cost_function_node=0.001127
# < Epoch: 0044 cost_function_node=0.001607
# < Epoch: 0045 cost_function_node=0.002032
# < Epoch: 0046 cost_function_node=0.003624
# < Epoch: 0047 cost_function_node=0.001705
# < Epoch: 0048 cost_function_node=0.000539
# < Epoch: 0049 cost_function_node=0.001552
# < Epoch: 0050 cost_function_node=0.000402
# < Epoch: 0051 cost_function_node=0.000527
# < Epoch: 0052 cost_function_node=0.000795
# < Epoch: 0053 cost_function_node=0.004713
# < Epoch: 0054 cost_function_node=0.000393
# < Epoch: 0055 cost_function_node=0.000432
# < Epoch: 0056 cost_function_node=0.001234
# < Epoch: 0057 cost_function_node=0.001675
# < Epoch: 0058 cost_function_node=0.000236
# < Epoch: 0059 cost_function_node=0.000658
# < Epoch: 0060 cost_function_node=0.001043
# < Epoch: 0061 cost_function_node=0.000448
# < Epoch: 0062 cost_function_node=0.000912
# < Epoch: 0063 cost_function_node=0.000458
# < Epoch: 0064 cost_function_node=0.000340
# < Epoch: 0065 cost_function_node=0.000792
# < Epoch: 0066 cost_function_node=0.000615
# < Epoch: 0067 cost_function_node=0.001182
# < Epoch: 0068 cost_function_node=0.000831
# < Epoch: 0069 cost_function_node=0.000702
# < Epoch: 0070 cost_function_node=0.001436
# < Epoch: 0071 cost_function_node=0.000726
# < Epoch: 0072 cost_function_node=0.001157
# < Epoch: 0073 cost_function_node=0.001240
# < Epoch: 0074 cost_function_node=0.000554
# < Epoch: 0075 cost_function_node=0.001175
# < Epoch: 0076 cost_function_node=0.000145
# < Epoch: 0077 cost_function_node=0.000794
# < Epoch: 0078 cost_function_node=0.002363
# < Epoch: 0079 cost_function_node=0.000701
# < Epoch: 0080 cost_function_node=0.000372
# < Epoch: 0081 cost_function_node=0.001338
# < Epoch: 0082 cost_function_node=0.002810
# < Epoch: 0083 cost_function_node=0.000174
# < Epoch: 0084 cost_function_node=0.000987
# < Epoch: 0085 cost_function_node=0.000428
# < Epoch: 0086 cost_function_node=0.001124
# < Epoch: 0087 cost_function_node=0.000337
# < Epoch: 0088 cost_function_node=0.000301
# < Epoch: 0089 cost_function_node=0.000216
# < Epoch: 0090 cost_function_node=0.000842
# < Epoch: 0091 cost_function_node=0.000508
# < Epoch: 0092 cost_function_node=0.000203
# < Epoch: 0093 cost_function_node=0.000245
# < Epoch: 0094 cost_function_node=0.000154
# < Epoch: 0095 cost_function_node=0.000282
# < Epoch: 0096 cost_function_node=0.000100
# < Epoch: 0097 cost_function_node=0.000246
# < Epoch: 0098 cost_function_node=0.000466
# < Epoch: 0099 cost_function_node=0.000794
# < Epoch: 0100 cost_function_node=0.000507
# < Completed optimization
# <
# < === Translation test ===
# < word -> 단어
# < wodr -> 나무
# < love -> 사랑
# < loev -> 사랑
# < abcd -> 놀이