Skip to content

Commit 37bc8bb

Browse files
Merge pull request #252 from Warosaurus/update_openai_lander_example
Update OpenAI Lander example
2 parents 4928381 + 36dcd31 commit 37bc8bb

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

examples/openai-lander/evolve.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import neat
1717
import visualize
1818

19-
NUM_CORES = 8
19+
NUM_CORES = multiprocessing.cpu_count()
2020

2121
env = gym.make('LunarLander-v2')
2222

@@ -86,21 +86,22 @@ def __init__(self, num_workers):
8686
def simulate(self, nets):
8787
scores = []
8888
for genome, net in nets:
89-
observation = env.reset()
89+
observation_init_vals, observation_init_info = env.reset()
9090
step = 0
9191
data = []
9292
while 1:
9393
step += 1
9494
if step < 200 and random.random() < 0.2:
9595
action = env.action_space.sample()
9696
else:
97-
output = net.activate(observation)
97+
output = net.activate(observation_init_vals)
9898
action = np.argmax(output)
9999

100-
observation, reward, done, info = env.step(action)
100+
# Note: done has been deprecated.
101+
observation, reward, terminated, done, info = env.step(action)
101102
data.append(np.hstack((observation, action, reward)))
102103

103-
if done:
104+
if terminated:
104105
break
105106

106107
data = np.array(data)
@@ -202,7 +203,7 @@ def run():
202203
solved = True
203204
best_scores = []
204205
for k in range(100):
205-
observation = env.reset()
206+
observation_init_vals, observation_init_info = env.reset()
206207
score = 0
207208
step = 0
208209
while 1:
@@ -211,14 +212,15 @@ def run():
211212
# determine the best action given the current state.
212213
votes = np.zeros((4,))
213214
for n in best_networks:
214-
output = n.activate(observation)
215+
output = n.activate(observation_init_vals)
215216
votes[np.argmax(output)] += 1
216217

217218
best_action = np.argmax(votes)
218-
observation, reward, done, info = env.step(best_action)
219+
# Note: done has been deprecated.
220+
observation, reward, terminated, done, info = env.step(best_action)
219221
score += reward
220222
env.render()
221-
if done:
223+
if terminated:
222224
break
223225

224226
ec.episode_score.append(score)

0 commit comments

Comments
 (0)