@@ -64,6 +64,22 @@ def display(gridmdp, _height, _width):
6464
6565	dialog .mainloop ()
6666
67+ def  display_best_policy (_best_policy , _height , _width ):
68+ 	''' displays best policy ''' 
69+ 
70+ 	dialog  =  tk .Toplevel ()
71+ 	dialog .wm_title ('Best Policy' )
72+ 
73+ 	container  =  tk .Frame (dialog )
74+ 	container .pack (side = tk .TOP , fill = tk .BOTH , expand = True )
75+ 
76+ 	for  i  in  range (max (1 , _height )):
77+ 		for  j  in  range (max (1 , _width )):
78+ 			label  =  ttk .Label (container , text = _best_policy [i ][j ], font = ('Helvetica' , 12 , 'bold' ))
79+ 			label .grid (row = i  +  1 , column = j  +  1 , padx = 3 , pady = 3 )
80+ 
81+ 	dialog .mainloop ()
82+ 
6783def  initialize_dialogbox (_width , _height , gridmdp , terminals , buttons ):
6884	''' creates dialogbox for initialization ''' 
6985
@@ -98,7 +114,7 @@ def initialize_dialogbox(_width, _height, gridmdp, terminals, buttons):
98114
99115	btn_apply  =  ttk .Button (container , text = 'Apply' , command = partial (initialize_update_table , _width , _height , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
100116	btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
101- 	btn_reset  =  ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
117+ 	btn_reset  =  ttk .Button (container , text = 'Reset' , command = partial (initialize_reset_all , _width , _height , gridmdp , terminals , buttons , reward ,  term ,  wall ,  label_reward , entry_reward , rbtn_wall , rbtn_term ))
102118	btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
103119	btn_ok  =  ttk .Button (container , text = 'Ok' , command = dialog .destroy )
104120	btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -146,9 +162,12 @@ def initialize_update_table(_width, _height, gridmdp, terminals, buttons, reward
146162		for  j  in  range (max (1 , _width )):
147163			update_table (i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall )
148164
149- def  reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
165+ def  reset_all (_height , i , j , gridmdp , terminals , buttons , reward ,  term ,  wall ,  label_reward , entry_reward , rbtn_wall , rbtn_term ):
150166	''' functionality for reset button ''' 
151167
168+ 	reward .set (0.0 )
169+ 	term .set (0 )
170+ 	wall .set (0 )
152171	gridmdp [i ][j ] =  0.0 
153172	buttons [i ][j ].configure (style = 'TButton' )
154173	buttons [i ][j ].config (text = f'({ _height  -  i  -  1 }  , { j }  )' )
@@ -163,12 +182,12 @@ def reset_all(_height, i, j, gridmdp, terminals, buttons, label_reward, entry_re
163182	rbtn_wall .state (['!focus' , '!selected' ])
164183	rbtn_term .state (['!focus' , '!selected' ])
165184
166- def  initialize_reset_all (_width , _height , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ):
185+ def  initialize_reset_all (_width , _height , gridmdp , terminals , buttons , reward ,  term ,  wall ,  label_reward , entry_reward , rbtn_wall , rbtn_term ):
167186	''' runs reset_all for all cells ''' 
168187
169188	for  i  in  range (max (1 , _height )):
170189		for  j  in  range (max (1 , _width )):
171- 			reset_all (_height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term )
190+ 			reset_all (_height , i , j , gridmdp , terminals , buttons , reward ,  term ,  wall ,  label_reward , entry_reward , rbtn_wall , rbtn_term )
172191
173192def  external_reset (_width , _height , gridmdp , terminals , buttons ):
174193	''' reset from edit menu ''' 
@@ -263,7 +282,7 @@ def dialogbox(i, j, gridmdp, terminals, buttons, _height):
263282
264283	btn_apply  =  ttk .Button (container , text = 'Apply' , command = partial (update_table , i , j , gridmdp , terminals , buttons , reward , term , wall , label_reward , entry_reward , rbtn_term , rbtn_wall ))
265284	btn_apply .grid (row = 5 , column = 0 , sticky = 'nsew' , pady = 5 , padx = 5 )
266- 	btn_reset  =  ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , label_reward , entry_reward , rbtn_wall , rbtn_term ))
285+ 	btn_reset  =  ttk .Button (container , text = 'Reset' , command = partial (reset_all , _height , i , j , gridmdp , terminals , buttons , reward ,  term ,  wall ,  label_reward , entry_reward , rbtn_wall , rbtn_term ))
267286	btn_reset .grid (row = 5 , column = 1 , sticky = 'nsew' , pady = 5 , padx = 5 )
268287	btn_ok  =  ttk .Button (container , text = 'Ok' , command = dialog .destroy )
269288	btn_ok .grid (row = 5 , column = 2 , sticky = 'nsew' , pady = 5 , padx = 5 )
@@ -595,6 +614,9 @@ def animate_graph(self, i):
595614		if  (self .delta  <  self .epsilon  *  (1  -  self .gamma ) /  self .gamma ) or  (self .iterations  >  60 ) and  self .terminated  ==  False :
596615			self .terminated  =  True 
597616			display (self .grid_to_show , self ._height , self ._width )
617+ 
618+ 			pi  =  best_policy (self .sequential_decision_environment , value_iteration (self .sequential_decision_environment , .01 ))
619+ 			display_best_policy (self .sequential_decision_environment .to_arrows (pi ), self ._height , self ._width )
598620
599621		ax  =  fig .gca ()
600622		ax .xaxis .set_major_locator (MaxNLocator (integer = True ))
0 commit comments