16
16
import neat
17
17
import visualize
18
18
19
- NUM_CORES = 8
19
+ NUM_CORES = multiprocessing . cpu_count ()
20
20
21
21
env = gym .make ('LunarLander-v2' )
22
22
@@ -86,21 +86,22 @@ def __init__(self, num_workers):
86
86
def simulate (self , nets ):
87
87
scores = []
88
88
for genome , net in nets :
89
- observation = env .reset ()
89
+ observation_init_vals , observation_init_info = env .reset ()
90
90
step = 0
91
91
data = []
92
92
while 1 :
93
93
step += 1
94
94
if step < 200 and random .random () < 0.2 :
95
95
action = env .action_space .sample ()
96
96
else :
97
- output = net .activate (observation )
97
+ output = net .activate (observation_init_vals )
98
98
action = np .argmax (output )
99
99
100
- observation , reward , done , info = env .step (action )
100
+ # Note: done has been deprecated.
101
+ observation , reward , terminated , done , info = env .step (action )
101
102
data .append (np .hstack ((observation , action , reward )))
102
103
103
- if done :
104
+ if terminated :
104
105
break
105
106
106
107
data = np .array (data )
@@ -202,7 +203,7 @@ def run():
202
203
solved = True
203
204
best_scores = []
204
205
for k in range (100 ):
205
- observation = env .reset ()
206
+ observation_init_vals , observation_init_info = env .reset ()
206
207
score = 0
207
208
step = 0
208
209
while 1 :
@@ -211,14 +212,15 @@ def run():
211
212
# determine the best action given the current state.
212
213
votes = np .zeros ((4 ,))
213
214
for n in best_networks :
214
- output = n .activate (observation )
215
+ output = n .activate (observation_init_vals )
215
216
votes [np .argmax (output )] += 1
216
217
217
218
best_action = np .argmax (votes )
218
- observation , reward , done , info = env .step (best_action )
219
+ # Note: done has been deprecated.
220
+ observation , reward , terminated , done , info = env .step (best_action )
219
221
score += reward
220
222
env .render ()
221
- if done :
223
+ if terminated :
222
224
break
223
225
224
226
ec .episode_score .append (score )
0 commit comments