Skip to content

Commit 8debcd8

Browse files
ad71norvig
authored andcommitted
Fixes problems in mdp.py (aimacode#918)
* Added MDP2 class * Updated loop termination condition in value_iteration
1 parent aba4854 commit 8debcd8

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

mdp.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,19 @@ def check_consistency(self):
104104
assert abs(s - 1) < 0.001
105105

106106

107+
class MDP2(MDP):
108+
109+
"""Inherits from MDP. Handles terminal states, and transitions to and from terminal states better."""
110+
def __init__(self, init, actlist, terminals, transitions, reward=None, gamma=0.9):
111+
MDP.__init__(self, init, actlist, terminals, transitions, reward, gamma=gamma)
112+
113+
def T(self, state, action):
114+
if action is None:
115+
return [(0.0, state)]
116+
else:
117+
return self.transitions[state][action]
118+
119+
107120
class GridMDP(MDP):
108121

109122
"""A two-dimensional grid MDP, as in [Figure 17.1]. All you have to do is
@@ -186,7 +199,7 @@ def value_iteration(mdp, epsilon=0.001):
186199
U1[s] = R(s) + gamma * max(sum(p*U[s1] for (p, s1) in T(s, a))
187200
for a in mdp.actions(s))
188201
delta = max(delta, abs(U1[s] - U[s]))
189-
if delta < epsilon*(1 - gamma)/gamma:
202+
if delta <= epsilon*(1 - gamma)/gamma:
190203
return U
191204

192205

0 commit comments

Comments
 (0)