# ================================================================================
import time
import gym
import numpy as np
import utils.prints as print_utils
# ================================================================================
# Slippery mode
env=gym.make('FrozenLake-v0')
# ================================================================================
# c Q: (16,4) 2D array
Q=np.zeros([env.observation_space.n,env.action_space.n])
# ================================================================================
# 0.85 belief on Q-mentor's guide
learning_rate=.85
# c dis: discount factor on future reward
dis=.99
num_episodes=2000
# ================================================================================
rList=[]
for i in range(num_episodes):
state=env.reset()
rAll=0
done=False
# ================================================================================
while not done:
# c action: select action using "adding random value to Q values"
action=np.argmax(Q[state,:]+np.random.randn(1,env.action_space.n)/(i+1))
# ================================================================================
# Do action
new_state,reward,done,_=env.step(action)
# ================================================================================
# @ Update Q table
agent_opinion=(1-learning_rate)*Q[state,action]
Q_mentor_opinion=learning_rate*(reward+dis*np.max(Q[new_state,:]))
Q[state,action]=agent_opinion+Q_mentor_opinion
# ================================================================================
# Newly observed state into current state
state=new_state
# ================================================================================
# Accumulate all rewards in single episode
rAll+=reward
# ================================================================================
rList.append(rAll)
# ================================================================================
print("Score over time: "+str(sum(rList)/num_episodes))
print("Final Q-Table Values")
print(Q)
plt.bar(range(len(rList)),rList,color="blue")
plt.show()
/home/young/Pictures/2019_04_22_12:52:10.png
Vertical line: success case