mctsdmr
diff --git a/‎tests/test_rl.py‎
Lines changed: 55 additions & 0 deletions b/‎tests/test_rl.py‎
Lines changed: 55 additions & 0 deletions
@@ -0,0 +1,55 @@
+import pytest
+
+from rl import *
+from mdp import sequential_decision_environment
+
+
+north = (0, 1)
+south = (0,-1)
+west = (-1, 0)
+east = (1, 0)
+
+policy = {
+    (0, 2): east,  (1, 2): east,  (2, 2): east,   (3, 2): None,
+    (0, 1): north,                (2, 1): north,  (3, 1): None,
+    (0, 0): north, (1, 0): west,  (2, 0): west,   (3, 0): west, 
+}
+
+
+
+def test_PassiveADPAgent():
+	agent = PassiveADPAgent(policy, sequential_decision_environment)
+	for i in range(75):
+		run_single_trial(agent,sequential_decision_environment)
+	
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
+	assert agent.U[(0, 1)] > 0.15 # In reality around 0.4
+	assert agent.U[(1, 0)] > 0 # In reality around 0.2
+
+
+
+def test_PassiveTDAgent():
+	agent = PassiveTDAgent(policy, sequential_decision_environment, alpha=lambda n: 60./(59+n))
+	for i in range(200):
+		run_single_trial(agent,sequential_decision_environment)
+	
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert agent.U[(0, 0)] > 0.15 # In reality around 0.3
+	assert agent.U[(0, 1)] > 0.15 # In reality around 0.35
+	assert agent.U[(1, 0)] > 0.15 # In reality around 0.25
+
+
+def test_QLearning():
+	q_agent = QLearningAgent(sequential_decision_environment, Ne=5, Rplus=2, 
+							 alpha=lambda n: 60./(59+n))
+
+	for i in range(200):
+		run_single_trial(q_agent,sequential_decision_environment)
+
+	# Agent does not always produce same results.
+	# Check if results are good enough.
+	assert q_agent.Q[((0, 1), (0, 1))] >= -0.5 # In reality around 0.1
+	assert q_agent.Q[((1, 0), (0, -1))] <= 0.5 # In reality around -0.1