|
6 | 6 | dictionary of {state:number} pairs. We then define the value_iteration |
7 | 7 | and policy_iteration algorithms.""" |
8 | 8 |
|
9 | | -# (Written for the second edition of AIMA; expect some discrepanciecs |
10 | | -# from the third edition until this gets reviewed.) |
11 | | - |
12 | 9 | from utils import * |
13 | 10 |
|
14 | 11 | class MDP: |
15 | 12 | """A Markov Decision Process, defined by an initial state, transition model, |
16 | 13 | and reward function. We also keep track of a gamma value, for use by |
17 | 14 | algorithms. The transition model is represented somewhat differently from |
18 | | - the text. Instead of T(s, a, s') being a probability number for each |
19 | | - state/action/state triplet, we instead have T(s, a) return a list of (p, s') |
| 15 | + the text. Instead of P(s' | s, a) being a probability number for each |
| 16 | + state/state/action triplet, we instead have T(s, a) return a list of (p, s') |
20 | 17 | pairs. We also keep track of the possible states, terminal states, and |
21 | | - actions for each state. [page 615]""" |
| 18 | + actions for each state. [page 646]""" |
22 | 19 |
|
23 | 20 | def __init__(self, init, actlist, terminals, gamma=.9): |
24 | 21 | update(self, init=init, actlist=actlist, terminals=terminals, |
|
0 commit comments