010-003. seq2seq, translator # Seq2Seq is model which creates sequence and trains sequence # Seq2Seq is used for chatbot, translation, image captioning # @ # You will create translator which translates english word into korean word import tensorflow as tf import numpy as np # S is symbol representing start point of decoding's input # E is symbol representing end point of decoding's output # P is symbol representing filling character for empty sequence, # when size of word is smaller than size of time step of current batch data # For example, if maxiaml size of current batch data is 4, # word -> ['w','o','r','d'] # to -> ['t','o','P','P'] characters_list=[one_character for one_character in 'SEPabcdefghijklmnopqrstuvwxyz단어나무놀이소녀키스사랑'] # < ['S', # < 'E', # < 'P', # < 'a', # < 'b', # < 'c', # < 'd', # < 'e', # < 'f', # < 'g', # < 'h', # < 'i', # < 'j', # < 'k', # < 'l', # < 'm', # < 'n', # < 'o', # < 'p', # < 'q', # < 'r', # < 's', # < 't', # < 'u', # < 'v', # < 'w', # < 'x', # < 'y', # < 'z', # < '단', # < '어', # < '나', # < '무', # < '놀', # < '이', # < '소', # < '녀', # < '키', # < '스', # < '사', # < '랑'] character_index_dic={word:index for index,word in enumerate(characters_list)} # {'E': 1, # 'P': 2, # 'S': 0, # 'a': 3, # 'b': 4, # 'c': 5, # 'd': 6, # 'e': 7, # 'f': 8, # 'g': 9, # 'h': 10, # 'i': 11, # 'j': 12, # 'k': 13, # 'l': 14, # 'm': 15, # 'n': 16, # 'o': 17, # 'p': 18, # 'q': 19, # 'r': 20, # 's': 21, # 't': 22, # 'u': 23, # 'v': 24, # 'w': 25, # 'x': 26, # 'y': 27, # 'z': 28, # '나': 31, # '녀': 36, # '놀': 33, # '단': 29, # '랑': 40, # '무': 32, # '사': 39, # '소': 35, # '스': 38, # '어': 30, # '이': 34, # '키': 37} length_of_character_index_dic=len(character_index_dic) # 41 # This is train dataset for translator of english to korean train_dataset_list=[['word','단어'],['wood','나무'], ['game','놀이'],['girl','소녀'], ['kiss','키스'],['love','사랑']] def make_batch(train_dataset_list): input_batch_one_hot_encoded_english_word_list=[] output_batch_one_hot_encoded_translated_korean_word_list=[] target_batch=[] for one_train_dataset_list in train_dataset_list: # one_train_dataset_list[0] # word # w # character_index_dic[w] # 25 # [25,17,20,6] one_english_word_represented_in_number_list\ =[character_index_dic[n] for n in one_train_dataset_list[0]] # one_train_dataset_list[1] # 단어 # S단어 # S # character_index_dic[S] # 0 # [0,29,30] one_korean_translated_word_list\ =[character_index_dic[n] for n in ('S'+one_train_dataset_list[1])] # one_train_dataset_list[1] # 단어 # 단어E # 단 # character_index_dic[단] # 29 # [29,30,1] one_korean_translated_word_target_list\ =[character_index_dic[n] for n in (one_train_dataset_list[1]+'E')] # np.eye(41)[[25,17,20,6]].shape # (4,41) # np.eye(41)[[25,17,20,6]] # [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.], # [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.], # [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.], # [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.]] input_batch_one_hot_encoded_english_word_list.append(np.eye(length_of_character_index_dic)[one_english_word_represented_in_number_list]) # np.eye(41)[[0,29,30]].shape # (3,41) # np.eye(41)[[0,29,30]] # [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.], # [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.], # [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,0., 0., 0., 0., 0., 0., 0., 0., 0.]] output_batch_one_hot_encoded_translated_korean_word_list\ .append(np.eye(length_of_character_index_dic)[one_korean_translated_word_list]) # Prediction value and label value don't need to be one-hot-encoded, # because you will use sparse_softmax_cross_entropy_with_logits(), # as loss function target_batch.append(one_korean_translated_word_target_list) return input_batch_one_hot_encoded_english_word_list\ ,output_batch_one_hot_encoded_translated_korean_word_list\ ,target_batch # You configure options learning_rate=0.01 number_of_hidden_layer=128 total_epoch=100 # Since input and output are one-hot-encoding, # size of them are same number_of_class=number_of_input=length_of_character_index_dic # @ # You build neural network model # In case of Seq2Seq model, # format of encoder's input and format of decoder's input are same # [None,None,number_of_input]=[batch_size,time_steps,input_size] input_data_to_encoder_placeholder_node\ =tf.placeholder(tf.float32,[None,None,number_of_input]) input_data_to_decoder_placeholder_node\ =tf.placeholder(tf.float32,[None,None,number_of_input]) # [None,None]=[batch size,time steps] target_data_placeholder_node=tf.placeholder(tf.int64,[None,None]) # You build encoder cell with tf.variable_scope('encode'): encoder_cell_node=tf.nn.rnn_cell.BasicRNNCell(number_of_hidden_layer) droppedout_encoder_cell_node=tf.nn.rnn_cell.DropoutWrapper(encoder_cell_node,output_keep_prob=0.5) # You build recurrent neural network output_out_of_encoder,state_out_of_encoder=tf.nn.dynamic_rnn(\ droppedout_encoder_cell_node\ ,input_data_to_encoder_placeholder_node\ ,dtype=tf.float32) # You build decoder cell with tf.variable_scope('decode'): decoder_cell_node=tf.nn.rnn_cell.BasicRNNCell(number_of_hidden_layer) droppedout_decoder_cell_node=tf.nn.rnn_cell.DropoutWrapper(decoder_cell_node,output_keep_prob=0.5) # Most important point of Seq2Seq concept is # that you will input last state value of encoder initial_state=state_out_of_encoder, # into decoder's initial state output_out_of_decoder,state_out_of_decoder=tf.nn.dynamic_rnn(\ droppedout_decoder_cell_node\ ,input_data_to_decoder_placeholder_node\ ,initial_state=state_out_of_encoder\ ,dtype=tf.float32) # I create model model=tf.layers.dense(output_out_of_decoder,number_of_class,activation=None) # I create loss function cost_function_node=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\ logits=model,labels=target_data_placeholder_node)) # I create adam_optimizer_node adam_optimizer_node=tf.train.AdamOptimizer(learning_rate) to_be_trained_node=adam_optimizer_node.minimize(cost_function_node) # @ # You train neural network model sess_object=tf.Session() sess_object.run(tf.global_variables_initializer()) input_batch_one_hot_encoded_english_word_list\ ,output_batch_one_hot_encoded_translated_korean_word_list\ ,target_batch\ =make_batch(train_dataset_list) for epoch in range(total_epoch): _,loss_value=sess_object.run(\ [to_be_trained_node,cost_function_node]\ ,feed_dict={\ input_data_to_encoder_placeholder_node:input_batch_one_hot_encoded_english_word_list\ ,input_data_to_decoder_placeholder_node:output_batch_one_hot_encoded_translated_korean_word_list\ ,target_data_placeholder_node:target_batch}) print('Epoch:','%04d'%(epoch+1),'cost_function_node =','{:.6f}'.format(loss_value)) print('Completed optimization') # @ # You can test translator # This method takes word and predict translated word def translate(word): # This model uses [englishword,koreanword], # as input data and output data # But at prediction time,this model doesn't know korean word # So,you should fill input value and output value of decoder, # with meanless characters like P # ['wood','PPPP'] train_dataset_list=[word,'P'*len(word)] # ['wood','PPPP'] input_batch_one_hot_encoded_english_word_list\ ,output_batch_one_hot_encoded_translated_korean_word_list\ ,target_batch\ =make_batch([train_dataset_list]) # model=tf.layers.dense(output_out_of_decoder,number_of_class,activation=None) # Output of model will be format of [batch_size,time_step,input] # You extract highest value from 2nd dimension(dimension of input) # Then, you will make that value as prediction word which has highest probability prediction_word_node=tf.argmax(model,2) result_of_prediction=sess_object.run(\ prediction_word_node\ ,feed_dict={\ input_data_to_encoder_placeholder_node:input_batch_one_hot_encoded_english_word_list\ ,input_data_to_decoder_placeholder_node:output_batch_one_hot_encoded_translated_korean_word_list\ ,target_data_placeholder_node:target_batch}) # You create character array, # by bringing character corresponding index number of result # i will be index number # characters_list[4]=b decoded_to_word_list=[characters_list[i] for i in result_of_prediction[0]] # You remove all words after "E" # Then,you create string index_of_end_point=decoded_to_word_list.index('E') final_translated_word_string=''.join(decoded_to_word_list[:index_of_end_point]) return final_translated_word_string print('\n=== Translation test ===') print('word ->',translate('word')) print('wodr ->',translate('wodr')) print('love ->',translate('love')) print('loev ->',translate('loev')) print('abcd ->',translate('abcd')) # < Epoch: 0001 cost_function_node=3.719440 # < Epoch: 0002 cost_function_node=2.826655 # < Epoch: 0003 cost_function_node=1.577134 # < Epoch: 0004 cost_function_node=0.949842 # < Epoch: 0005 cost_function_node=0.630703 # < Epoch: 0006 cost_function_node=0.409163 # < Epoch: 0007 cost_function_node=0.270477 # < Epoch: 0008 cost_function_node=0.136079 # < Epoch: 0009 cost_function_node=0.179610 # < Epoch: 0010 cost_function_node=0.122789 # < Epoch: 0011 cost_function_node=0.315257 # < Epoch: 0012 cost_function_node=0.209083 # < Epoch: 0013 cost_function_node=0.073182 # < Epoch: 0014 cost_function_node=0.158532 # < Epoch: 0015 cost_function_node=0.073537 # < Epoch: 0016 cost_function_node=0.131775 # < Epoch: 0017 cost_function_node=0.024948 # < Epoch: 0018 cost_function_node=0.016395 # < Epoch: 0019 cost_function_node=0.023747 # < Epoch: 0020 cost_function_node=0.006037 # < Epoch: 0021 cost_function_node=0.007485 # < Epoch: 0022 cost_function_node=0.011314 # < Epoch: 0023 cost_function_node=0.006030 # < Epoch: 0024 cost_function_node=0.004620 # < Epoch: 0025 cost_function_node=0.009484 # < Epoch: 0026 cost_function_node=0.002795 # < Epoch: 0027 cost_function_node=0.002990 # < Epoch: 0028 cost_function_node=0.004092 # < Epoch: 0029 cost_function_node=0.009223 # < Epoch: 0030 cost_function_node=0.003198 # < Epoch: 0031 cost_function_node=0.003220 # < Epoch: 0032 cost_function_node=0.002265 # < Epoch: 0033 cost_function_node=0.003203 # < Epoch: 0034 cost_function_node=0.002904 # < Epoch: 0035 cost_function_node=0.003606 # < Epoch: 0036 cost_function_node=0.001644 # < Epoch: 0037 cost_function_node=0.001191 # < Epoch: 0038 cost_function_node=0.003059 # < Epoch: 0039 cost_function_node=0.002428 # < Epoch: 0040 cost_function_node=0.003616 # < Epoch: 0041 cost_function_node=0.001635 # < Epoch: 0042 cost_function_node=0.000851 # < Epoch: 0043 cost_function_node=0.001127 # < Epoch: 0044 cost_function_node=0.001607 # < Epoch: 0045 cost_function_node=0.002032 # < Epoch: 0046 cost_function_node=0.003624 # < Epoch: 0047 cost_function_node=0.001705 # < Epoch: 0048 cost_function_node=0.000539 # < Epoch: 0049 cost_function_node=0.001552 # < Epoch: 0050 cost_function_node=0.000402 # < Epoch: 0051 cost_function_node=0.000527 # < Epoch: 0052 cost_function_node=0.000795 # < Epoch: 0053 cost_function_node=0.004713 # < Epoch: 0054 cost_function_node=0.000393 # < Epoch: 0055 cost_function_node=0.000432 # < Epoch: 0056 cost_function_node=0.001234 # < Epoch: 0057 cost_function_node=0.001675 # < Epoch: 0058 cost_function_node=0.000236 # < Epoch: 0059 cost_function_node=0.000658 # < Epoch: 0060 cost_function_node=0.001043 # < Epoch: 0061 cost_function_node=0.000448 # < Epoch: 0062 cost_function_node=0.000912 # < Epoch: 0063 cost_function_node=0.000458 # < Epoch: 0064 cost_function_node=0.000340 # < Epoch: 0065 cost_function_node=0.000792 # < Epoch: 0066 cost_function_node=0.000615 # < Epoch: 0067 cost_function_node=0.001182 # < Epoch: 0068 cost_function_node=0.000831 # < Epoch: 0069 cost_function_node=0.000702 # < Epoch: 0070 cost_function_node=0.001436 # < Epoch: 0071 cost_function_node=0.000726 # < Epoch: 0072 cost_function_node=0.001157 # < Epoch: 0073 cost_function_node=0.001240 # < Epoch: 0074 cost_function_node=0.000554 # < Epoch: 0075 cost_function_node=0.001175 # < Epoch: 0076 cost_function_node=0.000145 # < Epoch: 0077 cost_function_node=0.000794 # < Epoch: 0078 cost_function_node=0.002363 # < Epoch: 0079 cost_function_node=0.000701 # < Epoch: 0080 cost_function_node=0.000372 # < Epoch: 0081 cost_function_node=0.001338 # < Epoch: 0082 cost_function_node=0.002810 # < Epoch: 0083 cost_function_node=0.000174 # < Epoch: 0084 cost_function_node=0.000987 # < Epoch: 0085 cost_function_node=0.000428 # < Epoch: 0086 cost_function_node=0.001124 # < Epoch: 0087 cost_function_node=0.000337 # < Epoch: 0088 cost_function_node=0.000301 # < Epoch: 0089 cost_function_node=0.000216 # < Epoch: 0090 cost_function_node=0.000842 # < Epoch: 0091 cost_function_node=0.000508 # < Epoch: 0092 cost_function_node=0.000203 # < Epoch: 0093 cost_function_node=0.000245 # < Epoch: 0094 cost_function_node=0.000154 # < Epoch: 0095 cost_function_node=0.000282 # < Epoch: 0096 cost_function_node=0.000100 # < Epoch: 0097 cost_function_node=0.000246 # < Epoch: 0098 cost_function_node=0.000466 # < Epoch: 0099 cost_function_node=0.000794 # < Epoch: 0100 cost_function_node=0.000507 # < Completed optimization # < # < === Translation test === # < word -> 단어 # < wodr -> 나무 # < love -> 사랑 # < loev -> 사랑 # < abcd -> 놀이