006-lab-001. softmax_function function for multinomial classification by tf # @ # Softmax function is useful when you predict from muliple classes # Binomial classification predicts from 0 or 1 # @ # lab-06-1-softmax_classifier.py import tensorflow as tf tf.set_random_seed(777) # [n,4] x_train_data=[[1,2,1,1], [2,1,3,2], [3,1,3,4], [4,1,5,5], [1,7,5,5], [1,2,5,6], [1,6,6,6], [1,7,7,7]] # [n,3] # We express multiple classses in one hot encoding # When classes are '0','1','2', # you first create 3 lists with 3 spaces # [ ,,] # '0'=[1,0,0] # '1'=[0,1,0] # '2'=[0,0,1] # y_train_data before one hot encoding=[2,2,2,1,1,1,0,0] y_train_data=[[0,0,1], [0,0,1], [0,0,1], [0,1,0], [0,1,0], [0,1,0], [1,0,0], [1,0,0]] # [n,4] X_placeholder=tf.placeholder("float",[None,4]) # [n,3] Y_placeholder=tf.placeholder("float",[None,3]) nb_classes=3 # [n,4][?,?]=[n,3] # [?,?]=[4,3] W_variable=tf.Variable(tf.random_normal([4,nb_classes]),name='weight') b_variable=tf.Variable(tf.random_normal([nb_classes]),name='bias') # XW=yhat(=scores=logits) # tf.matmul(X_placeholder,W_variable)+b_variable=yhat(=scores=logits) # softmax_function=exp(logits)/reduce_sum(exp(logits),dim) hypothesis_f_for_multinomial_classification\ =tf.nn.softmax(tf.matmul(X_placeholder,W_variable)+b_variable) # Loss function of hypothesis_f_for_multinomial_classification is cross entropy function # You perfomr reduce sum for row data cross_entropy_cost_f_for_h_f_for_multinomial_classification\ =tf.reduce_mean(-tf.reduce_sum(Y_placeholder*tf.log(hypothesis_f_for_multinomial_classification)\ ,axis=1)) gradient_descent_optimizer\ =tf.train.GradientDescentOptimizer(learning_rate=0.1)\ .minimize(cross_entropy_cost_f_for_h_f_for_multinomial_classification) with tf.Session() as sess_object: sess_object.run(tf.global_variables_initializer()) for step in range(2001): sess_object.run(gradient_descent_optimizer\ ,feed_dict={X_placeholder:x_train_data,Y_placeholder:y_train_data}) if step%200==0: print("step:",step,\ "cost_function:",sess_object.run(cross_entropy_cost_f_for_h_f_for_multinomial_classification\ ,feed_dict={X_placeholder:x_train_data,Y_placeholder:y_train_data})) print('--------------') # After training, you can test model prediction_value_case_a=sess_object.run(hypothesis_f_for_multinomial_classification\ ,feed_dict={X_placeholder:[[1,11,7,9]]}) print("prediction_value_case_a:\n",prediction_value_case_a,\ "\nmax from prediction_value_case_a:\n",sess_object.run(tf.argmax(prediction_value_case_a,1))) print('--------------') prediction_value_case_b=sess_object.run(hypothesis_f_for_multinomial_classification\ ,feed_dict={X_placeholder:[[1,3,4,3]]}) print("prediction_value_case_b:\n",prediction_value_case_b\ ,"\nmax from prediction_value_case_b:\n",sess_object.run(tf.argmax(prediction_value_case_b,1))) print('--------------') prediction_value_case_c=sess_object.run(hypothesis_f_for_multinomial_classification\ ,feed_dict={X_placeholder: [[1,1,0,1]]}) print("prediction_value_case_c:\n",prediction_value_case_c\ ,"\nmax from prediction_value_case_c:\n",sess_object.run(tf.argmax(prediction_value_case_c,1))) print('--------------') prediction_value_case_all=sess_object.run(hypothesis_f_for_multinomial_classification\ ,feed_dict={X_placeholder: [[1,11,7,9],[1,3,4,3],[1,1,0,1]]}) print("prediction_value_case_all:\n",prediction_value_case_all\ ,"\nmax from prediction_value_case_all:\n",sess_object.run(tf.argmax(prediction_value_case_all,1))) # step: 0 cost_function: 8.5005455 # step: 200 cost_function: 0.5673558 # step: 400 cost_function: 0.45715225 # step: 600 cost_function: 0.36152935 # step: 800 cost_function: 0.27131468 # step: 1000 cost_function: 0.22999714 # step: 1200 cost_function: 0.20844801 # step: 1400 cost_function: 0.19053437 # step: 1600 cost_function: 0.17539053 # step: 1800 cost_function: 0.16241854 # step: 2000 cost_function: 0.15118504 # -------------- # prediction_value_case_a: # [[2.0230063e-03 9.9796706e-01 9.8606524e-06]] # max from prediction_value_case_a: # [1] # -------------- # prediction_value_case_b: # [[0.9123496 0.07824012 0.00941026]] # max from prediction_value_case_b: # [0] # -------------- # prediction_value_case_c: # [[1.1673185e-08 3.3480799e-04 9.9966514e-01]] # max from prediction_value_case_c: # [2] # -------------- # prediction_value_case_all: # [[2.0230063e-03 9.9796706e-01 9.8606524e-06] # [9.1234958e-01 7.8240119e-02 9.4102612e-03] # [1.1673185e-08 3.3480799e-04 9.9966514e-01]] # max from prediction_value_case_all: # [1 0 2]