Commit 766bb68a authored by leswing's avatar leswing
Browse files

Lower learning rate of a3c test for robustness

parent f52d003e
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ class TestA3C(unittest.TestCase):
        TestPolicy(),
        max_rollout_length=50,
        optimizer=dc.models.tensorgraph.TFWrapper(
            tf.train.AdamOptimizer, learning_rate=0.005))
            tf.train.AdamOptimizer, learning_rate=0.0001))
    a3c.fit(100000)

    # It should have learned that the expected value is very close to zero, and that the best
@@ -81,4 +81,5 @@ class TestA3C(unittest.TestCase):
    new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._graph.model_dir)
    new_a3c.fit(0, restore=True)
    action_prob2, value2 = new_a3c.predict([[0]])
    print(value2, value)
    assert value2 == value