Commit bd089f6c authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #640 from peastman/scale

Changed scaling of A3C loss function
parents 32d6cc88 e1a7c224
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -26,9 +26,9 @@ class A3CLoss(Layer):
    ]
    prob = prob + np.finfo(np.float32).eps
    log_prob = tf.log(prob)
    policy_loss = -tf.reduce_sum(advantage * tf.reduce_sum(action * log_prob))
    value_loss = tf.reduce_sum(tf.square(reward - value))
    entropy = -tf.reduce_sum(prob * log_prob)
    policy_loss = -tf.reduce_mean(advantage * tf.reduce_sum(action * log_prob))
    value_loss = tf.reduce_mean(tf.square(reward - value))
    entropy = -tf.reduce_mean(tf.reduce_sum(prob * log_prob, axis=1))
    self.out_tensor = policy_loss + self.value_weight * value_loss - self.entropy_weight * entropy
    return self.out_tensor

+2 −2
Original line number Diff line number Diff line
@@ -58,9 +58,9 @@ class TestA3C(unittest.TestCase):
    a3c = dc.rl.A3C(
        env,
        TestPolicy(),
        max_rollout_length=50,
        max_rollout_length=20,
        optimizer=dc.models.tensorgraph.TFWrapper(
            tf.train.AdamOptimizer, learning_rate=0.0001))
            tf.train.AdamOptimizer, learning_rate=0.001))
    a3c.fit(100000)

    # It should have learned that the expected value is very close to zero, and that the best