https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.pjz9g59ap
# ================================================================================
import gym
import numpy as np
import matplotlib.pyplot as plt
# ================================================================================
# Slippery mode
env=gym.make('FrozenLake-v0')
# ================================================================================
# c Q: (16,4) 2D array
Q=np.zeros([env.observation_space.n,env.action_space.n])
# ================================================================================
# 0.85 belief on Q-mentor's guide
learning_rate=.85
# c dis: discount factor on future reward
dis=.99
num_episodes=2000
# ================================================================================
rList=[]
for i in range(num_episodes):
state=env.reset()
rAll=0
done=False
# ================================================================================
while not done:
# c action: select action using "adding random value to Q values"
action=np.argmax(Q[state,:]+np.random.randn(1,env.action_space.n)/(i+1))
# ================================================================================
# Do action
new_state,reward,done,_=env.step(action)
# ================================================================================
# Update Q table
Q[state,action]=reward+dis*np.max(Q[new_state,:])
# ================================================================================
# Newly observed state into current state
state=new_state
# ================================================================================
# Accumulate all rewards in single episode
rAll+=reward
# ================================================================================
rList.append(rAll)
# ================================================================================
print("Score over time: "+str(sum(rList)/num_episodes))
print("Final Q-Table Values")
print(Q)
plt.bar(range(len(rList)),rList,color="blue")
plt.show()
/home/young/Pictures/2019_04_22_12:45:09.png
Vertical line: success case