Commit 1d1878ed authored by peastman's avatar peastman
Browse files

Began converting A3C to KerasModel

parent af54fae1
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -224,11 +224,11 @@ class KerasModel(Model):
    if tf.executing_eagerly():
      return
    self._label_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        tf.placeholder(dtype=tf.as_dtype(t), shape=(None,) + x.shape[1:])
        for x, t in zip(example_batch[1], self._label_dtypes)
    ]
    self._weights_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=x.shape)
        tf.placeholder(dtype=tf.as_dtype(t), shape=(None,) + x.shape[1:])
        for x, t in zip(example_batch[2], self._weights_dtypes)
    ]
    self._loss_tensor = self._loss_fn(
@@ -937,6 +937,7 @@ class KerasModel(Model):
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    """
    self._ensure_built()
    if checkpoint is None:
      checkpoint = tf.train.latest_checkpoint(self.model_dir)
    if checkpoint is None:
@@ -972,6 +973,7 @@ class _StandardLoss(object):
        shape = tuple(w.shape.as_list())
      else:
        shape = w.shape
      shape = tuple(-1 if x is None else x for x in shape)
      w = tf.reshape(w, shape + (1,) * (len(losses.shape) - len(w.shape)))
    loss = losses * w
    return tf.reduce_mean(loss) + sum(self.model.losses)
+8 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ class Environment(object):
        self._state_dtype = numpy.float32
    else:
      self._state_dtype = state_dtype
    print(self._state_dtype)

  @property
  def state(self):
@@ -185,6 +186,13 @@ class Policy(object):
  or even on different computers.
  """

  def create_model(self, **kwargs):
    raise NotImplemented("Subclasses must implement this")

  @property
  def output_names(self):
    raise NotImplemented("Subclasses must implement this")

  def create_layers(self, state, **kwargs):
    """Create the TensorGraph Layers that define the policy.

+193 −174

File changed.

Preview size limit exceeded, changes collapsed.

+49 −18
Original line number Diff line number Diff line
@@ -48,12 +48,22 @@ class TestA3C(unittest.TestCase):

    class TestPolicy(dc.rl.Policy):

      def create_layers(self, state, **kwargs):
        action = Variable(np.ones(env.n_actions))
        output = SoftMax(
            in_layers=[Reshape(in_layers=[action], shape=(-1, env.n_actions))])
        value = Variable([0.0])
        return {'action_prob': output, 'value': value}
      def create_model(self, **kwargs):

        class TestModel(tf.keras.Model):
          def __init__(self):
            super(TestModel, self).__init__(**kwargs)
            self.action = tf.Variable(np.ones(env.n_actions, np.float32))
            self.value = tf.Variable([0.0], tf.float32)
          def call(self, inputs, **kwargs):
            prob = tf.nn.softmax(tf.reshape(self.action, (-1, env.n_actions)))
            return (prob, self.value)

        return TestModel()

      @property
      def output_names(self):
        return ['action_prob', 'value']

    # Optimize it.

@@ -75,14 +85,14 @@ class TestA3C(unittest.TestCase):
    # Verify that we can create a new A3C object, reload the parameters from the first one, and
    # get the same result.

    new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._graph.model_dir)
    new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir)
    new_a3c.restore()
    action_prob2, value2 = new_a3c.predict([[0]])
    assert value2 == value

    # Do the same thing, only using the "restore" argument to fit().

    new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._graph.model_dir)
    new_a3c = dc.rl.A3C(env, TestPolicy(), model_dir=a3c._model.model_dir)
    new_a3c.fit(0, restore=True)
    action_prob2, value2 = new_a3c.predict([[0]])
    assert value2 == value
@@ -196,6 +206,21 @@ class TestA3C(unittest.TestCase):

    class TestPolicy(dc.rl.Policy):

      def create_model(self, **kwargs):
        state = tf.keras.layers.Input(shape=(4,))
        dense1 = tf.keras.layers.Dense(6, activation=tf.nn.relu)(state)
        dense2 = tf.keras.layers.Dense(6, activation=tf.nn.relu)(dense1)
        output = tf.keras.layers.Dense(
            4,
            activation=tf.nn.softmax,
            use_bias=False)(dense2)
        value = tf.keras.layers.Dense(1)(dense2)
        return tf.keras.Model(inputs=state, outputs=[output, value])

      @property
      def output_names(self):
        return ['action_prob', 'value']

      def create_layers(self, state, **kwargs):

        dense1 = Dense(6, activation_fn=tf.nn.relu, in_layers=state)
@@ -264,16 +289,22 @@ class TestA3C(unittest.TestCase):

    class TestPolicy(dc.rl.Policy):

      def create_layers(self, state, **kwargs):
        action_mean = Dense(
            1, in_layers=state, weights_initializer=tf.zeros_initializer)
        action_std = Constant([10.0])
        value = Dense(1, in_layers=state)
        return {
            'action_mean': action_mean,
            'action_std': action_std,
            'value': value
        }
      def create_model(self, **kwargs):

        class TestModel(tf.keras.Model):
          def __init__(self):
            super(TestModel, self).__init__(**kwargs)
            self.mean = tf.keras.layers.Dense(1, kernel_initializer='zeros')
            self.std = tf.constant([10.0])
            self.value = tf.keras.layers.Dense(1)
          def call(self, inputs, **kwargs):
            return (self.mean(inputs), self.std, self.value(inputs))

        return TestModel()

      @property
      def output_names(self):
        return ['action_mean', 'action_std', 'value']

    # Optimize it.