@@ -64,6 +64,22 @@ def display(gridmdp, _height, _width):
6464
6565 dialog .mainloop ()
6666
67+ def display_best_policy (_best_policy , _height , _width ):
68+ ''' displays best policy '''
69+
70+ dialog = tk .Toplevel ()
71+ dialog .wm_title ('Best Policy' )
72+
73+ container = tk .Frame (dialog )
74+ container .pack (side = tk .TOP , fill = tk .BOTH , expand = True )
75+
76+ for i in range (max (1 , _height )):
77+ for j in range (max (1 , _width )):
78+ label = ttk .Label (container , text = _best_policy [i ][j ], font = ('Helvetica' , 12 , 'bold' ))
79+ label .grid (row = i + 1 , column = j + 1 , padx = 3 , pady = 3 )
80+
81+ dialog .mainloop ()
82+
6783def initialize_dialogbox (_width , _height , gridmdp , terminals , buttons ):
6884 ''' creates dialogbox for initialization '''
6985
@@ -98,7 +114,7 @@ def initialize_dialogbox(_width, _height, gridmdp, terminals, buttons):
98114
99115 btn_apply = ttk .Button (container , text = 'Apply' , command = partial (initialize_update_table , _width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
100116 btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
101- btn_reset = ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
117+ btn_reset = ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ))
102118 btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
103119 btn_ok = ttk .Button (container , text = 'Ok' , command = dialog .destroy )
104120 btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -146,9 +162,12 @@ def initialize_update_table(_width, _height, gridmdp, terminals, buttons, reward
146162 for j in range (max (1 , _width )):
147163 update_table (i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall )
148164
149- def reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
165+ def reset_all (_height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ):
150166 ''' functionality for reset button '''
151167
168+ reward .set (0.0 )
169+ term .set (0 )
170+ wall .set (0 )
152171 gridmdp [i ][j ] = 0.0
153172 buttons [i ][j ].configure (style = 'TButton' )
154173 buttons [i ][j ].config (text = f'({ _height - i - 1 } , { j } )' )
@@ -163,12 +182,12 @@ def reset_all(_height, i, j, gridmdp, terminals, buttons, label_reward, entry_re
163182 rbtn_wall .state (['!focus' , '!selected' ])
164183 rbtn_term .state (['!focus' , '!selected' ])
165184
166- def initialize_reset_all (_width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
185+ def initialize_reset_all (_width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ):
167186 ''' runs reset_all for all cells '''
168187
169188 for i in range (max (1 , _height )):
170189 for j in range (max (1 , _width )):
171- reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term )
190+ reset_all (_height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term )
172191
173192def external_reset (_width , _height , gridmdp , terminals , buttons ):
174193 ''' reset from edit menu '''
@@ -263,7 +282,7 @@ def dialogbox(i, j, gridmdp, terminals, buttons, _height):
263282
264283 btn_apply = ttk .Button (container , text = 'Apply' , command = partial (update_table , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
265284 btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
266- btn_reset = ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
285+ btn_reset = ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_wall , rbtn_term ))
267286 btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
268287 btn_ok = ttk .Button (container , text = 'Ok' , command = dialog .destroy )
269288 btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -595,6 +614,9 @@ def animate_graph(self, i):
595614 if (self .delta < self .epsilon * (1 - self .gamma ) / self .gamma ) or (self .iterations > 60 ) and self .terminated == False :
596615 self .terminated = True
597616 display (self .grid_to_show , self ._height , self ._width )
617+
618+ pi = best_policy (self .sequential_decision_environment , value_iteration (self .sequential_decision_environment , .01 ))
619+ display_best_policy (self .sequential_decision_environment .to_arrows (pi ), self ._height , self ._width )
598620
599621 ax = fig .gca ()
600622 ax .xaxis .set_major_locator (MaxNLocator (integer = True ))
0 commit comments