Commit f1a46112 authored by peastman's avatar peastman
Browse files

Minor fixes to comments

parent d787bc0d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -504,7 +504,7 @@ class _Worker(object):
          1] += self.a3c.discount_factor * self.a3c.advantage_lambda * advantages[
              j]

    # Record the actions, computing to one-hot if necessary.
    # Record the actions, converting to one-hot if necessary.

    actions_matrix = []
    if self.a3c.continuous:
+1 −1
Original line number Diff line number Diff line
@@ -260,7 +260,7 @@ class TestA3C(unittest.TestCase):
        self._terminated = (self.count == 10)
        return reward

    # A simple policy with two hidden layers.
    # A simple policy with no hidden layers.

    class TestPolicy(dc.rl.Policy):