004-002-lab. slicing, using loaded csv file as data, queue runners, batch, reader # lab-04-3-file_input_linear_regression.py # @ # data-01-test-score.csv # EXAM1,EXAM2,EXAM3,FINAAL # 73,80,75,152 # 93,88,93,185 # 89,91,90,180 # 96,98,100,196 # 73,66,70,142 import tensorflow as tf import numpy as np tf.set_random_seed(777) # Slicing nums = range(5) print(nums) # < [0,1,2,3,4] print(nums[2:4]) # < [2,3] print(nums[2:]) # < [2,3,4] print(nums[:2]) # < [0,1] print(nums[:]) # < [0,1,2,3,4] print(nums[:-1]) # < [0,1,2,3] nums[2:4] = [8,9] print(nums) # < [0,1,8,9,4] # We can use slicing on numpy high dimension array b = np.array([[1,2,3,4]. [5,6,7,8]. [9,10,11,12]]) print(b) # < array([[1,2,3,4], # < [5,6,7,8], # < [9,10,11,12]]) # from entire row, select 1th column b[:, 1] # < array([2, 6, 10]) b[-1] # < array([9,10,11,12]) # from last row, select entire column b[-1, :] # < array([9,10,11,12]) b[-1, ...] # < array([9,10,11,12]) b[0:2, :] # < array([[1,2,3,4], # < [5,6,7,8]] # I use loadtxt from numpy xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32) # I create dataset from data-01-test-score.csv file # from entire row, select 0:-1 columns # That is, 3 features data x_data = xy[:, 0:-1] # from entire row, select last column y_data = xy[:, [-1]] # You check shape and data print(x_data.shape, x_data, len(x_data)) print(y_data.shape, y_data) # XW = H(X) # $$$[None, 3] \cdot [3, 1] = [None, 1]$$$ X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([3, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # Hypothesis function node hypothesis = tf.matmul(X, W) + b # Loss function node cost = tf.reduce_mean(tf.square(hypothesis - Y)) # We minimize loss by GradientDescentOptimizer optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) for step in range(2001): cost_val, hy_val, _ = sess.run( [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data}) if step % 10 == 0: print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val) # When my score is [100, 70, 101], what will prediction value of my final exam score? print("Your score will be ", sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]})) # < Your score will be [[ 181.73277283]] print("Other scores will be ", sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]})) # < Other scores will be [[145.86265564] [187.23129272]] # @ # lab-04-4-tf_reader_linear_regression.py # When size of dataset files is too big to load them onto memory at onece, # tensorflow provides 'queue runners' # Tensorflow suffles fiels and then stacks them onto queue # One queue generates multiple readers(reader1, reader2, ...) # Each reader is decoded(parsed) by decoder(parser) and then stacked into example queue # We can load them in batch size # Lab 4 Multi-variable linear regression # https://www.tensorflow.org/programmers_guide/reading_data @ import tensorflow as tf tf.set_random_seed(777) filename_queue = tf.train.string_input_producer(\ ['data-01-test-score.csv', 'data-02-test-score.csv', 'data-02-test-score.csv'],\ shuffle=False,\ name='filename_queue') # I declare reader node reader = tf.TextLineReader() key, value = reader.read(filename_queue) # We define default values in case empty columns exist. # And default values also can specify type of the decoded result record_defaults = [[0.], [0.], [0.], [0.]] # We will use decode_csv decoder(parser) xy = tf.decode_csv(value, record_defaults=record_defaults) # We create batch data train_x_batch, train_y_batch = \ tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10) X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([3, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') hypothesis = tf.matmul(X, W) + b cost = tf.reduce_mean(tf.square(hypothesis - Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) # Youu start populating filename queue coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for step in range(2001): x_batch, y_batch = sess.run([train_x_batch, train_y_batch]) cost_val, hy_val, _ = sess.run( [cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch}) if step % 10 == 0: print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val) coord.request_stop() coord.join(threads) Let model predict my score of final exam print("Your score will be ", sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]})) # < Your score will be [[ 177.78144836]] print("Other scores will be ", sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}) # < Other scores will be [[ 141.10997009] [ 191.17378235]]