https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-0-q-learning-with-tables-and-neural-networks-d195264329d0#.pjz9g59ap ================================================================================
# ================================================================================
import gym
import numpy as np
import matplotlib.pyplot as plt
from gym.envs.registration import register
import random as pr

# ================================================================================
# Random argmax
def rargmax(vector):
  m=np.amax(vector)
  indices=np.nonzero(vector==m)[0]
  returnpr.choice(indices)

# ================================================================================
register(
  id='FrozenLake-v3',
  entry_point='gym.envs.toy_text:FrozenLakeEnv',
  kwargs={'map_name':'4x4','is_slippery': False}
)

env=gym.make('FrozenLake-v3')

# ================================================================================
# c Q: (16,4) 2D array
Q=np.zeros([env.observation_space.n,env.action_space.n])

# @ 2000 episodes
num_episodes=2000

# ================================================================================
# rList will save results("total rewards" and "steps") per episode
rList=[]
for i in range(num_episodes):
  # c state: reset env and get current 1st state
  state=env.reset()

  rAll=0

  # done=True: end of game
  done=False

  # The Q-Table learning algorithm
  while not done:
    # c action: select action
    # Select maximal Q
    # If Q values have same value, select Q randomly
    # rargmax(): random argmax()
    action=rargmax(Q[state,:])

    # ================================================================================
    # Execute action
    new_state,reward,done,_=env.step(action)

    # ================================================================================
    # Update Q function Q[state, action] by using equation
    # : from Q[new_state,:] means all kinds of actions like LRUD
    Q[state,action]=reward+np.max(Q[new_state,:])

    # ================================================================================
    # You sum all rewards from each step
    rAll+=reward

    # https://raw.githubusercontent.com/youngminpark2559/pracrl/master/shkim-rl/pic/2019_04_22_00:04:43.png
    # In one episode, agent falls into hole (0)
    # In second episode, agent goes to terminating place (+1)
    # rAll stroes 0, +1, -1, etc

    # ================================================================================
    state=new_state

  rList.append(rAll)

# ================================================================================
print("Success rate: "+str(sum(rList)/num_episodes))
print("Final Q-Table Values")
print("LEFT DOWN RIGHT UP")
print(Q)
plt.bar(range(len(rList)),rList,color="blue")
plt.show()
================================================================================