024. deep learning with keras
# @
# Run bmi-create.py to create bmi.csv file
import random
from keras.module import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
# This is method which returns calculated 3 labels of BMI
def calc_bmi(h,w):
    bmi=w/(h/100)**2
    if bmi<18.5: 
        return "thin"
    if bmi<25: 
        return "normal"
    return "fat"
# I prepare csv file where data will be written
fp=open("bmi.csv","w",encoding="utf-8")
fp.write("height, weight, label\r\n")
# I create random data
# I create cnt dictionary and each value of key is initialized by 0
cnt={"thin":0,"normal":0,"fat":0}
# I will iterate from 0 to 20000
for i in range(20000):
    # I generate random number for height
    h=random.randint(120,200)
    # I generate random number for weight
    w=random.randint(35, 80)
    # I get bmi label
    label=calc_bmi(h,w)
    # I increate one value for the corresponding label in the cnt dictionary
    cnt[label]+=1
    # I write contents into bmi.csv in the following format
    fp.write("{0},{1},{2}\r\n".format(h,w,label))
# I close file strean
fp.close()
print("file generation is completed : ",cnt)
# @
# Now, I will let keras model to train with following steps
# 1. I load modules I need to process this task
# 1. I load bmi.csv file data
# 1. I create model which is similar to learning methods like fit() and predict()
# For learning
# For predicting
# For evaluating
# I process features of train data
# [
#     # I normalize data
#     [height/200,weight/100],
#     [height/200,weight/100]
# ]
# # I process labels of train data
# [
#     # Each label will be converted as one-hot-encoding
#     # [1,0,0],[0,1,0],[0,0,1]
#     "thin",
#     "normal",
#     "fat"
# ]
# @
# I load bmi.csv file
csv=pd.read_csv("d://chromedown//bmi.csv")
# I bring all data from weight column
# and I normalize data
csv["weight"]/=100 
csv["height"]/=200
# I create bmi_class dictionary
# and I convert each label into one-hot-encoding value in list
bmi_class=
{
    "thin":[1,0,0],
    "normal":[0,1,0],
    "fat":[0,0,1]
}
# I create 20000 lists which has 3 spaces inside of one list
# For example,
# [
#     [0,0,0],
#     [0,0,0],
#     ...,
#     [0,0,0]
# ]
y=np.empty((20000,3))
# I write index and value in front of area of tuple from data
# which I bring from csv["label"] by using enumerate(csv["label"])
for i,v in enumerate(csv["label"]):
    y[i]=bmi_class[v]
    # I will use following output as label
    # for corresponding feature of train data
    # output:
    # [
    #     [0 0 1]
    #     [1 0 0]
    #     ...
    # ]
# I convert data from csv file into matrix
x=csv[["weight", "height"]].as_matrix()
# I divide data into train data and test data
x_train,y_train=x[1:15001],y[1:15001]
x_test,y_test=x[15001:20001],y[15001:20001]
# I use Sequential model
# and I compose layers
# and you will get composed model
model=Sequential()
model.add(Dense(512, input_shape=(2,)))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(3))
model.add(Activation('softmax'))
# I actually build model
model.compile("rmsprop","categorical_crossentropy",metrics=["accuracy"])
# For learning
# model.fit(x_train,y_train)
# To enhance performance, you can pass addtional arguments
model.fit
(
    x_train, 
    y_train,
    batch_size=100,
    nb_epoch=20,
    validation_split=0.1,
    callbacks=[EarlyStopping(monitor='val_loss',patience=2)],
    verbose=1
)
# For predicting 
model.predict()
# For evaluating
score=model.evaluate(x_test,y_test)
# score is returned as list
print("score of loss:",score[0])
print("score of accuracy:",score[1])
# @
# Reference keras document on http://keras.io/modules/Sequential
# For compile(), we pass 
# 1 argument: optimizer, 
# 2 argument: loss, 
# 3 argument: metrics(default=none), if you use this, you can get better evalution from training and testing
# You can see various kind of optimizers
# You can try to use RMSprop optimizer
# You can see various kind of losses
# You can try to use categorical_crossentropy loss
# @
# Run this python file
# and you will see tensorflow running in backend
# you will see loss value and accuracy value in middle of training