009-lab-002. tensorboard
# @
# Tensorboard requires 5 steps
# step 1. From tensorflow graph in code, decide tensors you want to log
# Kind of method is determined by kind of data
# scalar value uses scalar()
# vector or matrix or tensor use histogram()
# I want to log W2
# w2_hist=tf.summary.histogram("weights2", W2)
# I want to log cost
# cost_summ=tf.summary.scalar("cost", cost)
# step 2. Merge all summaries
# summary=tf.summary.merge_all()
# step 3. Create 'writer node' and 'add node'
# './logs' is place into which you craete log file
# writer=tf.summary.FileWriter('./logs')
# writer.add_graph(sess.graph)
# step 4. Since summary is also node, you should run it
# s, _ = sess.run([summary, optimizer], feed_dict=feed_dict)
# You actually write summary(s) into file
# writer.add_summary(s, global_step=global_step)
# step 5. Lanuch tensorboard
# tensorboard --logdir=./logs
# Tip for tensorboard
# When you run tensorboard in remote machine, you run into complex situation like opening port
# In this situation, you can use ssh
# username@server.com: your remote server
# remote_port: 6006
# local_port: Whatever you want
# ssh -L local_port:127.0.0.1:remote_port username@server.com
# For example,
# in local> ssh -L 7007:127.0.0.1:6006 username@server.com
# in server> tensorboard -logdir=./logs/xor_logs
# Then, you go to http://127.0.0.1:7007 in your local machine
# and you will see tensorboard running in remote server
# @
# If you want to run multiple graphs,
# you should create multiple folder where logs are saved
# I use ./logs/xor_logs folder for learning_rate=0.1
# tensorboard -logdir=./logs/xor_logs
# train=tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
# ...
# writer=tf.summary.FileWriter("./logs/xor_logs")
# I use ./logs/xor_logs_r0_01 folder for learning_rate=0.01
# tensorboard -logdir=./logs/xor_logs_r0_01
# train=tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)
# ...
# writer=tf.summary.FileWriter("./logs/xor_logs_r0_01")
# Then, I run tensorboard with parent folder
# tensorboard -logdir=./logs
# @
# lab-09-4-xor_tensorboard.py
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)
learning_rate = 0.01
x_data = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
y_data = [[0],
[1],
[1],
[0]]
x_data = np.array(x_data, dtype=np.float32)
y_data = np.array(y_data, dtype=np.float32)
X = tf.placeholder(tf.float32, [None, 2], name='x-input')
Y = tf.placeholder(tf.float32, [None, 1], name='y-input')
# We can arrange tensors by using name_scope() based on each layer
# And then, if you click layer1 and layer2, you will see detailed all nodes
with tf.name_scope("layer1") as scope:
W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1')
b1 = tf.Variable(tf.random_normal([2]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
w1_hist = tf.summary.histogram("weights1", W1)
b1_hist = tf.summary.histogram("biases1", b1)
layer1_hist = tf.summary.histogram("layer1", layer1)
with tf.name_scope("layer2") as scope:
W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2)
w2_hist = tf.summary.histogram("weights2", W2)
b2_hist = tf.summary.histogram("biases2", b2)
hypothesis_hist = tf.summary.histogram("hypothesis", hypothesis)
# Loss function
with tf.name_scope("cost") as scope:
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
tf.log(1 - hypothesis))
cost_summ = tf.summary.scalar("cost", cost)
with tf.name_scope("train") as scope:
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
accuracy_summ = tf.summary.scalar("accuracy", accuracy)
with tf.Session() as sess:
# tensorboard --logdir=./logs/xor_logs
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("./logs/xor_logs_r0_01")
writer.add_graph(sess.graph) # Show the graph
sess.run(tf.global_variables_initializer())
for step in range(10001):
summary, _ = sess.run([merged_summary, train], feed_dict={X: x_data, Y: y_data})
writer.add_summary(summary, global_step=step)
if step % 100 == 0:
print(step, sess.run(cost, feed_dict={X: x_data, Y: y_data}), sess.run([W1, W2]))
# Accuracy report
h, c, a = sess.run([hypothesis, predicted, accuracy],
feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
# Hypothesis: [[ 6.13103184e-05]
# [ 9.99936938e-01]
# [ 9.99950767e-01]
# [ 5.97514772e-05]]
# Correct: [[ 0.]
# [ 1.]
# [ 1.]
# [ 0.]]
# Accuracy: 1.0
# @
# lab-09-5-linear_back_prop.py
# http://blog.aloni.org/posts/backprop-with-tensorflow/
# https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.b3rvzhx89
# WIP
import tensorflow as tf
tf.set_random_seed(777) # reproducibility
# tf Graph Input
x_data = [[1.],
[2.],
[3.]]
y_data = [[1.],
[2.],
[3.]]
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 1])
Y = tf.placeholder(tf.float32, shape=[None, 1])
# Set wrong model weights
W = tf.Variable(tf.truncated_normal([1, 1]))
b = tf.Variable(5.)
# Forward prop
hypothesis = tf.matmul(X, W) + b
# diff
assert hypothesis.shape.as_list() == Y.shape.as_list()
diff = (hypothesis - Y)
# Back prop (chain rule)
d_l1 = diff
d_b = d_l1
d_w = tf.matmul(tf.transpose(X), d_l1)
print(X, W, d_l1, d_w)
# Updating network using gradients
learning_rate = 0.1
step = [
tf.assign(W, W - learning_rate * d_w),
tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)),
]
# 7. Running and testing the training process
RMSE = tf.reduce_mean(tf.square((Y - hypothesis)))
sess = tf.InteractiveSession()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(1000):
print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data}))
print(sess.run(hypothesis, feed_dict={X: x_data}))
DeepLearningZeroToAll/lab-09-6-multi-linear_back_prop.py
# http://blog.aloni.org/posts/backprop-with-tensorflow/
# https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.b3rvzhx89
# WIP
import tensorflow as tf
tf.set_random_seed(777) # reproducibility
# tf Graph Input
x_data = [[73., 80., 75.],
[93., 88., 93.],
[89., 91., 90.],
[96., 98., 100.],
[73., 66., 70.]]
y_data = [[152.],
[185.],
[180.],
[196.],
[142.]]
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
# Set wrong model weights
W = tf.Variable(tf.truncated_normal([3, 1]))
b = tf.Variable(5.)
# Forward prop
hypothesis = tf.matmul(X, W) + b
print(hypothesis.shape, Y.shape)
# diff
assert hypothesis.shape.as_list() == Y.shape.as_list()
diff = (hypothesis - Y)
# Back prop (chain rule)
d_l1 = diff
d_b = d_l1
d_w = tf.matmul(tf.transpose(X), d_l1)
print(X, d_l1, d_w)
# Updating network using gradients
learning_rate = 1e-6
step = [
tf.assign(W, W - learning_rate * d_w),
tf.assign(b, b - learning_rate * tf.reduce_mean(d_b)),
]
# 7. Running and testing the training process
RMSE = tf.reduce_mean(tf.square((Y - hypothesis)))
sess = tf.InteractiveSession()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(10000):
print(i, sess.run([step, RMSE], feed_dict={X: x_data, Y: y_data}))
print(sess.run(hypothesis, feed_dict={X: x_data}))
DeepLearningZeroToAll/lab-09-7-sigmoid_back_prop.py
"""
In this file, we will implement back propagations by hands
We will use the Sigmoid Cross Entropy loss function.
This is equivalent to tf.nn.sigmoid_softmax_with_logits(logits, labels)
[References]
1) Tensorflow Document (tf.nn.sigmoid_softmax_with_logits)
https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
2) Neural Net Backprop in one slide! by Sung Kim
https://docs.google.com/presentation/d/1_ZmtfEjLmhbuM_PqbDYMXXLAqeWN0HwuhcSKnUQZ6MM/edit#slide=id.g1ec1d04b5a_1_83
3) Back Propagation with Tensorflow by Dan Aloni
http://blog.aloni.org/posts/backprop-with-tensorflow/
4) Yes you should understand backprop by Andrej Karpathy
https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.cockptkn7
[Network Architecture]
Input: x
Layer1: x * W + b
Output layer = σ(Layer1)
Loss_i = - y * log(σ(Layer1)) - (1 - y) * log(1 - σ(Layer1))
Loss = tf.reduce_sum(Loss_i)
We want to compute that
dLoss/dW = ???
dLoss/db = ???
please read "Neural Net Backprop in one slide!" for deriving formulas
"""
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)
# Predicting animal type based on various features
xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
X_data = xy[:, 0:-1]
N = X_data.shape[0]
y_data = xy[:, [-1]]
# y_data has labels from 0 ~ 6
print("y has one of the following values")
print(np.unique(y_data))
# X_data.shape = (101, 16) => 101 samples, 16 features
# y_data.shape = (101, 1) => 101 samples, 1 label
print("Shape of X data: ", X_data.shape)
print("Shape of y data: ", y_data.shape)
nb_classes = 7 # 0 ~ 6
X = tf.placeholder(tf.float32, [None, 16])
y = tf.placeholder(tf.int32, [None, 1]) # 0 ~ 6
target = tf.one_hot(y, nb_classes) # one hot
target = tf.reshape(target, [-1, nb_classes])
target = tf.cast(target, tf.float32)
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
def sigma(x):
# sigmoid function
# σ(x) = 1 / (1 + exp(-x))
return 1. / (1. + tf.exp(-x))
def sigma_prime(x):
# derivative of the sigmoid function
# σ'(x) = σ(x) * (1 - σ(x))
return sigma(x) * (1. - sigma(x))
# Forward propagtion
layer_1 = tf.matmul(X, W) + b
y_pred = sigma(layer_1)
# Loss Function (end of forwad propagation)
loss_i = - target * tf.log(y_pred) - (1. - target) * tf.log(1. - y_pred)
loss = tf.reduce_sum(loss_i)
# Dimension Check
assert y_pred.shape.as_list() == target.shape.as_list()
# Back propagation(chain rule)
d_loss = (y_pred - target) / (y_pred * (1. - y_pred) + 1e-7)
d_sigma = sigma_prime(layer_1)
d_layer = d_loss * d_sigma
d_b = d_layer
d_W = tf.matmul(tf.transpose(X), d_layer)
# Updating network using gradients
learning_rate = 0.01
train_step = [
tf.assign(W, W - learning_rate * d_W),
tf.assign(b, b - learning_rate * tf.reduce_sum(d_b)),
]
# Prediction and Accuracy
prediction = tf.argmax(y_pred, 1)
acct_mat = tf.equal(tf.argmax(y_pred, 1), tf.argmax(target, 1))
acct_res = tf.reduce_mean(tf.cast(acct_mat, tf.float32))
# Launch graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(500):
sess.run(train_step, feed_dict={X: X_data, y: y_data})
if step % 10 == 0:
# Within 300 steps, you should see an accuracy of 100%
step_loss, acc = sess.run([loss, acct_res], feed_dict={
X: X_data, y: y_data})
print("Step: {:5}\t Loss: {:10.5f}\t Acc: {:.2%}" .format(
step, step_loss, acc))
# Let's see if we can predict
pred = sess.run(prediction, feed_dict={X: X_data})
for p, y in zip(pred, y_data):
msg = "[{}]\t Prediction: {:d}\t True y: {:d}"
print(msg.format(p == int(y[0]), p, int(y[0])))
# Output Example
# Step: 0 Loss: 453.74799 Acc: 38.61%
# Step: 20 Loss: 95.05664 Acc: 88.12%
# Step: 40 Loss: 66.43570 Acc: 93.07%
# Step: 60 Loss: 53.09288 Acc: 94.06%
# ...
# Step: 290 Loss: 18.72972 Acc: 100.00%
# Step: 300 Loss: 18.24953 Acc: 100.00%
# Step: 310 Loss: 17.79592 Acc: 100.00%
# ...
# [True] Prediction: 0 True y: 0
# [True] Prediction: 0 True y: 0
# [True] Prediction: 3 True y: 3
# [True] Prediction: 0 True y: 0
# ...
# @
# lab-09-x-xor-nn-back_prop.py
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)
learning_rate = 0.1
x_data = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
y_data = [[0],
[1],
[1],
[0]]
x_data = np.array(x_data, dtype=np.float32)
y_data = np.array(y_data, dtype=np.float32)
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None, 1])
W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1')
b1 = tf.Variable(tf.random_normal([2]), name='bias1')
l1 = tf.sigmoid(tf.matmul(X, W1) + b1)
W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
Y_pred = tf.sigmoid(tf.matmul(l1, W2) + b2)
# Lloss function
cost = -tf.reduce_mean(Y * tf.log(Y_pred) + (1 - Y) *
tf.log(1 - Y_pred))
# Network
# p1 a1 l1 p2 a2 l2 (y_pred)
# X -> (*) -> (+) -> (sigmoid) -> (*) -> (+) -> (sigmoid) -> (loss)
# ^ ^ ^ ^
# | | | |
# W1 b1 W2 b2
# Loss derivative
d_Y_pred = (Y_pred - Y) / (Y_pred * (1.0 - Y_pred) + 1e-7)
# Layer 2
d_sigma2 = Y_pred * (1 - Y_pred)
d_a2 = d_Y_pred * d_sigma2
d_p2 = d_a2
d_b2 = d_a2
d_W2 = tf.matmul(tf.transpose(l1), d_p2)
# Mean
d_b2_mean = tf.reduce_mean(d_b2, axis=[0])
d_W2_mean = d_W2 / tf.cast(tf.shape(l1)[0], dtype=tf.float32)
# Layer 1
d_l1 = tf.matmul(d_p2, tf.transpose(W2))
d_sigma1 = l1 * (1 - l1)
d_a1 = d_l1 * d_sigma1
d_b1 = d_a1
d_p1 = d_a1
d_W1 = tf.matmul(tf.transpose(X), d_a1)
# Mean
d_W1_mean = d_W1 / tf.cast(tf.shape(X)[0], dtype=tf.float32)
d_b1_mean = tf.reduce_mean(d_b1, axis=[0])
# Weight update
step = [
tf.assign(W2, W2 - learning_rate * d_W2_mean),
tf.assign(b2, b2 - learning_rate * d_b2_mean),
tf.assign(W1, W1 - learning_rate * d_W1_mean),
tf.assign(b1, b1 - learning_rate * d_b1_mean)
]
# Accuracy computation
# True if hypothesis > 0.5 else False
predicted = tf.cast(Y_pred > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
# Launch graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("shape", sess.run(tf.shape(X)[0], feed_dict={X: x_data}))
for i in range(10001):
sess.run([step, cost], feed_dict={X: x_data, Y: y_data})
if i % 1000 == 0:
print(i, sess.run([cost, d_W1], feed_dict={
X: x_data, Y: y_data}), sess.run([W1, W2]))
# Accuracy report
h, c, a = sess.run([Y_pred, predicted, accuracy],
feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
# Hypothesis: [[ 0.01338224]
# [ 0.98166382]
# [ 0.98809403]
# [ 0.01135806]]
# Correct: [[ 0.]
# [ 1.]
# [ 1.]
# [ 0.]]
# Accuracy: 1.0