CodeReclaimers
diff --git a/‎examples/openai-lander/evolve.py
Lines changed: 11 additions & 9 deletions b/‎examples/openai-lander/evolve.py
Lines changed: 11 additions & 9 deletions
@@ -16,7 +16,7 @@
 import neat
 import visualize
 
-NUM_CORES = 8
+NUM_CORES = multiprocessing.cpu_count()
 
 env = gym.make('LunarLander-v2')
 
@@ -86,21 +86,22 @@ def __init__(self, num_workers):
     def simulate(self, nets):
         scores = []
         for genome, net in nets:
-            observation = env.reset()
+            observation_init_vals, observation_init_info = env.reset()
             step = 0
             data = []
             while 1:
                 step += 1
                 if step < 200 and random.random() < 0.2:
                     action = env.action_space.sample()
                 else:
-                    output = net.activate(observation)
+                    output = net.activate(observation_init_vals)
                     action = np.argmax(output)
 
-                observation, reward, done, info = env.step(action)
+                # Note: done has been deprecated.
+                observation, reward, terminated, done, info = env.step(action)
                 data.append(np.hstack((observation, action, reward)))
 
-                if done:
+                if terminated:
                     break
 
             data = np.array(data)
@@ -202,7 +203,7 @@ def run():
             solved = True
             best_scores = []
             for k in range(100):
-                observation = env.reset()
+                observation_init_vals, observation_init_info = env.reset()
                 score = 0
                 step = 0
                 while 1:
@@ -211,14 +212,15 @@ def run():
                     # determine the best action given the current state.
                     votes = np.zeros((4,))
                     for n in best_networks:
-                        output = n.activate(observation)
+                        output = n.activate(observation_init_vals)
                         votes[np.argmax(output)] += 1
 
                     best_action = np.argmax(votes)
-                    observation, reward, done, info = env.step(best_action)
+                    # Note: done has been deprecated.
+                    observation, reward, terminated, done, info = env.step(best_action)
                     score += reward
                     env.render()
-                    if done:
+                    if terminated:
                         break
 
                 ec.episode_score.append(score)