009-lab-002. tensorboard # @ # Tensorboard requires 5 steps # step 1. From tensorflow graph in code, decide tensors you want to log # Kind of method is determined by kind of data # scalar value uses scalar() # vector or matrix or tensor use histogram() # I want to log W2 # w2_hist=tf.summary.histogram("weights2", W2) # I want to log cost # cost_summ=tf.summary.scalar("cost", cost) # step 2. Merge all summaries # summary=tf.summary.merge_all() # step 3. Create 'writer node' and 'add node' # './logs' is place into which you craete log file # writer=tf.summary.FileWriter('./logs') # writer.add_graph(sess.graph) # step 4. Since summary is also node, you should run it # s, _ = sess.run([summary, optimizer], feed_dict=feed_dict) # You actually write summary(s) into file # writer.add_summary(s, global_step=global_step) # step 5. Lanuch tensorboard # tensorboard --logdir=./logs # Tip for tensorboard # When you run tensorboard in remote machine, you run into complex situation like opening port # In this situation, you can use ssh # username@server.com: your remote server # remote_port: 6006 # local_port: Whatever you want # ssh -L local_port:127.0.0.1:remote_port username@server.com # For example, # in local> ssh -L 7007:127.0.0.1:6006 username@server.com # in server> tensorboard -logdir=./logs/xor_logs # Then, you go to http://127.0.0.1:7007 in your local machine # and you will see tensorboard running in remote server # @ # If you want to run multiple graphs, # you should create multiple folder where logs are saved # I use ./logs/xor_logs folder for learning_rate=0.1 # tensorboard -logdir=./logs/xor_logs # train=tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) # ... # writer=tf.summary.FileWriter("./logs/xor_logs") # I use ./logs/xor_logs_r0_01 folder for learning_rate=0.01 # tensorboard -logdir=./logs/xor_logs_r0_01 # train=tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) # ... # writer=tf.summary.FileWriter("./logs/xor_logs_r0_01") # Then, I run tensorboard with parent folder # tensorboard -logdir=./logs # @ # lab-09-4-xor_tensorboard.py import tensorflow as tf import numpy as np tf.set_random_seed(777) learning_rate = 0.01 x_data = [[0, 0], [0, 1], [1, 0], [1, 1]] y_data = [[0], [1], [1], [0]] x_data = np.array(x_data, dtype=np.float32) y_data = np.array(y_data, dtype=np.float32) X = tf.placeholder(tf.float32, [None, 2], name='x-input') Y = tf.placeholder(tf.float32, [None, 1], name='y-input') # We can arrange tensors by using name_scope() based on each layer # And then, if you click layer1 and layer2, you will see detailed all nodes with tf.name_scope("layer1") as scope: W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1') b1 = tf.Variable(tf.random_normal([2]), name='bias1') layer1 = tf.sigmoid(tf.matmul(X, W1) + b1) w1_hist = tf.summary.histogram("weights1", W1) b1_hist = tf.summary.histogram("biases1", b1) layer1_hist = tf.summary.histogram("layer1", layer1) with tf.name_scope("layer2") as scope: W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2') b2 = tf.Variable(tf.random_normal([1]), name='bias2') hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2) w2_hist = tf.summary.histogram("weights2", W2) b2_hist = tf.summary.histogram("biases2", b2) hypothesis_hist = tf.summary.histogram("hypothesis", hypothesis) # Loss function with tf.name_scope("cost") as scope: cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) cost_summ = tf.summary.scalar("cost", cost) with tf.name_scope("train") as scope: train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Accuracy computation # True if hypothesis>0.5 else False predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) accuracy_summ = tf.summary.scalar("accuracy", accuracy) with tf.Session() as sess: # tensorboard --logdir=./logs/xor_logs merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter("./logs/xor_logs_r0_01") writer.add_graph(sess.graph) # Show the graph sess.run(tf.global_variables_initializer()) for step in range(10001): summary, _ = sess.run([merged_summary, train], feed_dict={X: x_data, Y: y_data}) writer.add_summary(summary, global_step=step) if step % 100 == 0: print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run([W1, W2])) # Accuracy report h, c, a = sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data, Y: y_data}) print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a) # Hypothesis: [[ 6.13103184e-05] # [ 9.99936938e-01] # [ 9.99950767e-01] # [ 5.97514772e-05]] # Correct: [[ 0.] # [ 1.] # [ 1.] # [ 0.]] # Accuracy: 1.0 # @ # lab-09-5-linear_back_prop.py # http://blog.aloni.org/posts/backprop-with-tensorflow/ # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.b3rvzhx89 # WIP import tensorflow as tf tf.set_random_seed(777) # reproducibility # tf Graph Input x_data = [[1.], [2.], [3.]] y_data = [[1.], [2.], [3.]] # placeholders for a tensor that will be always fed. X = tf.placeholder(tf.float32, shape=[None, 1]) Y = tf.placeholder(tf.float32, shape=[None, 1]) # Set wrong model weights W = tf.Variable(tf.truncated_normal([1, 1])) b = tf.Variable(5.) # Forward prop hypothesis = tf.matmul(X, W) + b # diff assert hypothesis.shape.as_list() == Y.shape.as_list() diff = (hypothesis - Y) # Back prop (chain rule) d_l1 = diff d_b = d_l1 d_w = tf.matmul(tf.transpose(X), d_l1) print(X, W, d_l1, d_w) # Updating network using gradients learning_rate = 0.1 step = [ tf.assign(W, W - learning_rate * d_w), tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)), ] # 7. Running and testing the training process RMSE = tf.reduce_mean(tf.square((Y - hypothesis))) sess = tf.InteractiveSession() init = tf.global_variables_initializer() sess.run(init) for i in range(1000): print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data})) print(sess.run(hypothesis, feed_dict={X: x_data})) DeepLearningZeroToAll/lab-09-6-multi-linear_back_prop.py # http://blog.aloni.org/posts/backprop-with-tensorflow/ # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.b3rvzhx89 # WIP import tensorflow as tf tf.set_random_seed(777) # reproducibility # tf Graph Input x_data = [[73., 80., 75.], [93., 88., 93.], [89., 91., 90.], [96., 98., 100.], [73., 66., 70.]] y_data = [[152.], [185.], [180.], [196.], [142.]] # placeholders for a tensor that will be always fed. X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) # Set wrong model weights W = tf.Variable(tf.truncated_normal([3, 1])) b = tf.Variable(5.) # Forward prop hypothesis = tf.matmul(X, W) + b print(hypothesis.shape, Y.shape) # diff assert hypothesis.shape.as_list() == Y.shape.as_list() diff = (hypothesis - Y) # Back prop (chain rule) d_l1 = diff d_b = d_l1 d_w = tf.matmul(tf.transpose(X), d_l1) print(X, d_l1, d_w) # Updating network using gradients learning_rate = 1e-6 step = [ tf.assign(W, W - learning_rate * d_w), tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)), ] # 7. Running and testing the training process RMSE = tf.reduce_mean(tf.square((Y - hypothesis))) sess = tf.InteractiveSession() init = tf.global_variables_initializer() sess.run(init) for i in range(10000): print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data})) print(sess.run(hypothesis, feed_dict={X: x_data})) DeepLearningZeroToAll/lab-09-7-sigmoid_back_prop.py """ In this file, we will implement back propagations by hands We will use the Sigmoid Cross Entropy loss function. This is equivalent to tf.nn.sigmoid_softmax_with_logits(logits, labels) [References] 1) Tensorflow Document (tf.nn.sigmoid_softmax_with_logits) https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits 2) Neural Net Backprop in one slide! by Sung Kim https://docs.google.com/presentation/d/1_ZmtfEjLmhbuM_PqbDYMXXLAqeWN0HwuhcSKnUQZ6MM/edit#slide=id.g1ec1d04b5a_1_83 3) Back Propagation with Tensorflow by Dan Aloni http://blog.aloni.org/posts/backprop-with-tensorflow/ 4) Yes you should understand backprop by Andrej Karpathy https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.cockptkn7 [Network Architecture] Input: x Layer1: x * W + b Output layer = σ(Layer1) Loss_i = - y * log(σ(Layer1)) - (1 - y) * log(1 - σ(Layer1)) Loss = tf.reduce_sum(Loss_i) We want to compute that dLoss/dW = ??? dLoss/db = ??? please read "Neural Net Backprop in one slide!" for deriving formulas """ import tensorflow as tf import numpy as np tf.set_random_seed(777) # Predicting animal type based on various features xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32) X_data = xy[:, 0:-1] N = X_data.shape[0] y_data = xy[:, [-1]] # y_data has labels from 0 ~ 6 print("y has one of the following values") print(np.unique(y_data)) # X_data.shape = (101, 16) => 101 samples, 16 features # y_data.shape = (101, 1) => 101 samples, 1 label print("Shape of X data: ", X_data.shape) print("Shape of y data: ", y_data.shape) nb_classes = 7 # 0 ~ 6 X = tf.placeholder(tf.float32, [None, 16]) y = tf.placeholder(tf.int32, [None, 1]) # 0 ~ 6 target = tf.one_hot(y, nb_classes) # one hot target = tf.reshape(target, [-1, nb_classes]) target = tf.cast(target, tf.float32) W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight') b = tf.Variable(tf.random_normal([nb_classes]), name='bias') def sigma(x): # sigmoid function # σ(x) = 1 / (1 + exp(-x)) return 1. / (1. + tf.exp(-x)) def sigma_prime(x): # derivative of the sigmoid function # σ'(x) = σ(x) * (1 - σ(x)) return sigma(x) * (1. - sigma(x)) # Forward propagtion layer_1 = tf.matmul(X, W) + b y_pred = sigma(layer_1) # Loss Function (end of forwad propagation) loss_i = - target * tf.log(y_pred) - (1. - target) * tf.log(1. - y_pred) loss = tf.reduce_sum(loss_i) # Dimension Check assert y_pred.shape.as_list() == target.shape.as_list() # Back propagation(chain rule) d_loss = (y_pred - target) / (y_pred * (1. - y_pred) + 1e-7) d_sigma = sigma_prime(layer_1) d_layer = d_loss * d_sigma d_b = d_layer d_W = tf.matmul(tf.transpose(X), d_layer) # Updating network using gradients learning_rate = 0.01 train_step = [ tf.assign(W, W - learning_rate * d_W), tf.assign(b, b - learning_rate * tf.reduce_sum(d_b)), ] # Prediction and Accuracy prediction = tf.argmax(y_pred, 1) acct_mat = tf.equal(tf.argmax(y_pred, 1), tf.argmax(target, 1)) acct_res = tf.reduce_mean(tf.cast(acct_mat, tf.float32)) # Launch graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(500): sess.run(train_step, feed_dict={X: X_data, y: y_data}) if step % 10 == 0: # Within 300 steps, you should see an accuracy of 100% step_loss, acc = sess.run([loss, acct_res], feed_dict={ X: X_data, y: y_data}) print("Step: {:5}\t Loss: {:10.5f}\t Acc: {:.2%}" .format( step, step_loss, acc)) # Let's see if we can predict pred = sess.run(prediction, feed_dict={X: X_data}) for p, y in zip(pred, y_data): msg = "[{}]\t Prediction: {:d}\t True y: {:d}" print(msg.format(p == int(y[0]), p, int(y[0]))) # Output Example # Step: 0 Loss: 453.74799 Acc: 38.61% # Step: 20 Loss: 95.05664 Acc: 88.12% # Step: 40 Loss: 66.43570 Acc: 93.07% # Step: 60 Loss: 53.09288 Acc: 94.06% # ... # Step: 290 Loss: 18.72972 Acc: 100.00% # Step: 300 Loss: 18.24953 Acc: 100.00% # Step: 310 Loss: 17.79592 Acc: 100.00% # ... # [True] Prediction: 0 True y: 0 # [True] Prediction: 0 True y: 0 # [True] Prediction: 3 True y: 3 # [True] Prediction: 0 True y: 0 # ... # @ # lab-09-x-xor-nn-back_prop.py import tensorflow as tf import numpy as np tf.set_random_seed(777) learning_rate = 0.1 x_data = [[0, 0], [0, 1], [1, 0], [1, 1]] y_data = [[0], [1], [1], [0]] x_data = np.array(x_data, dtype=np.float32) y_data = np.array(y_data, dtype=np.float32) X = tf.placeholder(tf.float32, [None, 2]) Y = tf.placeholder(tf.float32, [None, 1]) W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1') b1 = tf.Variable(tf.random_normal([2]), name='bias1') l1 = tf.sigmoid(tf.matmul(X, W1) + b1) W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2') b2 = tf.Variable(tf.random_normal([1]), name='bias2') Y_pred = tf.sigmoid(tf.matmul(l1, W2) + b2) # Lloss function cost = -tf.reduce_mean(Y * tf.log(Y_pred) + (1 - Y) * tf.log(1 - Y_pred)) # Network # p1 a1 l1 p2 a2 l2 (y_pred) # X -> (*) -> (+) -> (sigmoid) -> (*) -> (+) -> (sigmoid) -> (loss) # ^ ^ ^ ^ # | | | | # W1 b1 W2 b2 # Loss derivative d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7) # Layer 2 d_sigma2 = Y_pred * (1 - Y_pred) d_a2 = d_Y_pred * d_sigma2 d_p2 = d_a2 d_b2 = d_a2 d_W2 = tf.matmul(tf.transpose(l1), d_p2) # Mean d_b2_mean = tf.reduce_mean(d_b2, axis=[0]) d_W2_mean = d_W2 / tf.cast(tf.shape(l1)[0], dtype=tf.float32) # Layer 1 d_l1 = tf.matmul(d_p2, tf.transpose(W2)) d_sigma1 = l1 * (1 - l1) d_a1 = d_l1 * d_sigma1 d_b1 = d_a1 d_p1 = d_a1 d_W1 = tf.matmul(tf.transpose(X), d_a1) # Mean d_W1_mean = d_W1 / tf.cast(tf.shape(X)[0], dtype=tf.float32) d_b1_mean = tf.reduce_mean(d_b1, axis=[0]) # Weight update step = [ tf.assign(W2, W2 - learning_rate * d_W2_mean), tf.assign(b2, b2 - learning_rate * d_b2_mean), tf.assign(W1, W1 - learning_rate * d_W1_mean), tf.assign(b1, b1 - learning_rate * d_b1_mean) ] # Accuracy computation # True if hypothesis > 0.5 else False predicted = tf.cast(Y_pred > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) # Launch graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print("shape", sess.run(tf.shape(X)[0], feed_dict={X: x_data})) for i in range(10001): sess.run([step, cost], feed_dict={X: x_data, Y: y_data}) if i % 1000 == 0: print(i, sess.run([cost, d_W1], feed_dict={ X: x_data, Y: y_data}), sess.run([W1, W2])) # Accuracy report h, c, a = sess.run([Y_pred, predicted, accuracy], feed_dict={X: x_data, Y: y_data}) print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a) # Hypothesis: [[ 0.01338224] # [ 0.98166382] # [ 0.98809403] # [ 0.01135806]] # Correct: [[ 0.] # [ 1.] # [ 1.] # [ 0.]] # Accuracy: 1.0