AIドリル 第3回リスト1 skinner_QL.py

1 import numpy as np
2 #シミュレータクラスの設定
3 class MySimulator():
4 def __init__(self):
5 self.reset()
6 #初期化
7 def reset(self):
8 self._state = 0
9 return self._state
10 #行動による状態変化
11 def step(self, action):
12 reward = 0
13 if self._state==0:#電源OFFの状態
14 if action==0:#電源ボタンを押す
15 self._state = 1#電源ONに変更
16 else:#行動ボタンを押す
17 self._state = 0#電源OFFのまま
18 else:#電源ONの状態
19 if action==0:#電源ボタンを押す
20 self._state = 0#電源OFFに変更
21 else:#行動ボタンを押す
22 self._state = 1#電源ONのまま
23 reward = 1#報酬が得られる
24 return self._state, reward
25 #Q値クラスの設定
26 class MyQTable():
27 def __init__(self):
28 self._Qtable = np.zeros((2, 2))
29 #行動の選択
30 def get_action(self, state, epsilon):
31 if epsilon > np.random.uniform(0, 1):#ランダム行動
32 next_action = np.random.choice([0, 1])
33 else:#Q値に従った行動
34 a = np.where(self._Qtable[state]==self._Qtable[state].max())[0]
35 next_action = np.random.choice(a)
36 return next_action
37 #Q値の更新
38 def update_Qtable(self, state, action, reward, next_state):
39 gamma = 0.9
40 alpha = 0.6
41 next_maxQ=max(self._Qtable[:,next_state])
42 self._Qtable[action, state] = (1 – alpha) * self._Qtable[action, state] + alpha * (reward + gamma * next_maxQ)
43 return self._Qtable
44
45 def main():
46 num_episodes = 10 #総試行回数
47 max_number_of_steps =5 #各試行の行動数
48 env = MySimulator()
49 tab = MyQTable()
50
51 for episode in range(num_episodes): #試行数分繰り返す
52 state = env.reset()
53 episode_reward = 0
54 for t in range(max_number_of_steps): #1試行のループ
55 action = tab.get_action(state, epsilon = 1-episode/num_episodes)#epsilon[episode]) #行動の決定
56 next_state, reward = env.step(action) #行動による状態変化
57 print(state, action, reward)#表示
58 tab.update_Qtable(state, action, reward, next_state)#Q値の更新
59 state = next_state
60 episode_reward += reward #報酬を追加
61 print(f’Episode:{episode+1:4.0f}, R:{episode_reward:3.0f}’)
62 print(tab._Qtable)
63 np.savetxt(‘Qvalue.txt’, tab._Qtable)
64
65 if __name__ == ‘__main__’:
66 main()