new_state_qid = find_state_qid(g.get_shutable(), g.get_diced())
Q[state_qid, opt_qid] += \
learning_rate * (reward
+ discount_factor * np.max(Q[new_state_qid, :])
- Q[state_qid, opt_qid])
state_qid = new_state_qid
new_state_qid = find_state_qid(g.get_shutable(), g.get_diced())
Q[state_qid, opt_qid] += \
learning_rate * (reward
+ discount_factor * np.max(Q[new_state_qid, :])
- Q[state_qid, opt_qid])
state_qid = new_state_qid