Commit 14da45b5 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #568 from lilleswing/a3c-tictactoe

DeepChem learns to play tictactoe
parents 3f2475b7 44926c08
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ install:
- pip install coveralls
- python setup.py install
script:
- nosetests -a '!slow' --with-timer --with-coverage --cover-package=deepchem -v deepchem --nologcapture
- nosetests --with-flaky -a '!slow' --with-timer --with-coverage --cover-package=deepchem -v deepchem --nologcapture
- find ./deepchem | grep .py$ |xargs python -m doctest -v
- bash devtools/travis-ci/test_format_code.sh
after_success:
+96 −0
Original line number Diff line number Diff line
import copy
import random
import shutil

import numpy as np
import tensorflow as tf

import deepchem as dc
import deepchem.rl.envs.tictactoe
from deepchem.models.tensorgraph.layers import Flatten, Dense, SoftMax, \
    BatchNorm, Squeeze


class TicTacToePolicy(dc.rl.Policy):

  def create_layers(self, state, **kwargs):
    d1 = Flatten(in_layers=state)
    d2 = Dense(
        in_layers=[d1],
        activation_fn=tf.nn.relu,
        normalizer_fn=tf.nn.l2_normalize,
        normalizer_params={"dim": 1},
        out_channels=64)
    d3 = Dense(
        in_layers=[d2],
        activation_fn=tf.nn.relu,
        normalizer_fn=tf.nn.l2_normalize,
        normalizer_params={"dim": 1},
        out_channels=32)
    d4 = Dense(
        in_layers=[d3],
        activation_fn=tf.nn.relu,
        normalizer_fn=tf.nn.l2_normalize,
        normalizer_params={"dim": 1},
        out_channels=16)
    d4 = BatchNorm(in_layers=[d4])
    d5 = Dense(in_layers=[d4], activation_fn=None, out_channels=9)
    value = Dense(in_layers=[d4], activation_fn=None, out_channels=1)
    value = Squeeze(squeeze_dims=1, in_layers=[value])
    probs = SoftMax(in_layers=[d5])
    return {'action_prob': probs, 'value': value}


def eval_tic_tac_toe(value_weight,
                     num_epoch_rounds=1,
                     games=10**4,
                     rollouts=10**5):
  """
    Returns the average reward over 1k games after 10k rollouts
    :param value_weight:
    :return:
    """
  env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment()
  policy = TicTacToePolicy()
  model_dir = "/tmp/tictactoe"
  try:
    shutil.rmtree(model_dir)
  except:
    pass

  avg_rewards = []
  for j in range(num_epoch_rounds):
    a3c = dc.rl.A3C(
        env,
        policy,
        entropy_weight=0.01,
        value_weight=value_weight,
        model_dir=model_dir)
    a3c.optimizer = dc.models.tensorgraph.TFWrapper(
        tf.train.AdamOptimizer, learning_rate=0.01)
    try:
      a3c.restore()
    except:
      print("unable to restore")
      pass
    a3c.fit(rollouts)
    rewards = []
    for i in range(games):
      env.reset()
      reward = -float('inf')
      while not env._terminated:
        action = a3c.select_action(env._state)
        reward = env.step(action)
      rewards.append(reward)
    avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})
  return avg_rewards


def main():
  value_weight = 6.0
  score = eval_tic_tac_toe(value_weight, num_epoch_rounds=3)
  print(score)


if __name__ == "__main__":
  main()
+15 −0
Original line number Diff line number Diff line
@@ -287,6 +287,21 @@ class Reshape(Layer):
    return out_tensor


class Squeeze(Layer):

  def __init__(self, squeeze_dims, **kwargs):
    self.squeeze_dims = squeeze_dims
    super(Squeeze, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    parent_tensor = inputs[0]
    out_tensor = tf.squeeze(parent_tensor, squeeze_dims=self.squeeze_dims)
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor


class Transpose(Layer):

  def __init__(self, perm, **kwargs):
+9 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ from tensorflow.python.framework import test_util
from deepchem.feat.mol_graphs import ConvMol
from deepchem.feat.mol_graphs import MultiConvMol
from deepchem.feat.graph_features import ConvMolFeaturizer
from deepchem.models.tensorgraph.layers import Conv1D
from deepchem.models.tensorgraph.layers import Conv1D, Squeeze
from deepchem.models.tensorgraph.layers import Dense
from deepchem.models.tensorgraph.layers import Flatten
from deepchem.models.tensorgraph.layers import Reshape
@@ -521,3 +521,11 @@ class TestLayers(test_util.TensorFlowTestCase):
      result = out_tensor.eval()
      assert result.shape == (1, 6, 1)
      assert np.array_equal(value1.reshape((1, 6, 1)) + value2, result)

  def test_squeeze_inputs(self):
    """Test that layers can automatically reshape inconsistent inputs."""
    value1 = np.random.uniform(size=(2, 1)).astype(np.float32)
    with self.test_session() as sess:
      out_tensor = Squeeze(squeeze_dims=1)(tf.constant(value1))
      result = out_tensor.eval()
      assert result.shape == (2,)
+11 −8
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ import unittest
import numpy as np
import os
from nose.tools import assert_true
from flaky import flaky

import deepchem as dc
from deepchem.data import NumpyDataset
@@ -30,13 +31,14 @@ class TestTensorGraph(unittest.TestCase):
    label = Label(shape=(None, 2))
    smce = SoftMaxCrossEntropy(in_layers=[label, dense])
    loss = ReduceMean(in_layers=[smce])
    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    tg.add_output(output)
    tg.set_loss(loss)
    tg.fit(dataset, nb_epoch=1000)
    prediction = np.squeeze(tg.predict_proba_on_batch(X))
    assert_true(np.all(np.isclose(prediction, y, atol=0.4)))

  @flaky
  def test_multi_task_classifier(self):
    n_data_points = 20
    n_features = 2
@@ -66,7 +68,7 @@ class TestTensorGraph(unittest.TestCase):

    total_loss = ReduceMean(in_layers=entropies)

    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)
@@ -90,7 +92,7 @@ class TestTensorGraph(unittest.TestCase):
    dense = Dense(out_channels=1, in_layers=[features])
    label = Label(shape=(None, 1))
    loss = ReduceSquareDifference(in_layers=[dense, label])
    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    tg.add_output(dense)
    tg.set_loss(loss)
    tg.fit(dataset, nb_epoch=1000)
@@ -125,7 +127,7 @@ class TestTensorGraph(unittest.TestCase):

    total_loss = ReduceMean(in_layers=losses)

    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)
@@ -139,6 +141,7 @@ class TestTensorGraph(unittest.TestCase):
      y_pred = prediction[:, i, :]
      assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))

  @flaky
  def test_no_queue(self):
    n_data_points = 20
    n_features = 2
@@ -151,7 +154,7 @@ class TestTensorGraph(unittest.TestCase):
    label = Label(shape=(None, 2))
    smce = SoftMaxCrossEntropy(in_layers=[label, dense])
    loss = ReduceMean(in_layers=[smce])
    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg = dc.models.TensorGraph(learning_rate=0.01, use_queue=False)
    tg.add_output(output)
    tg.set_loss(loss)
    tg.fit(dataset, nb_epoch=1000)
@@ -173,7 +176,7 @@ class TestTensorGraph(unittest.TestCase):
    tg = dc.models.TensorGraph(
        tensorboard=True,
        tensorboard_log_frequency=1,
        learning_rate=0.1,
        learning_rate=0.01,
        model_dir='/tmp/tensorgraph')
    tg.add_output(output)
    tg.set_loss(loss)
@@ -197,7 +200,7 @@ class TestTensorGraph(unittest.TestCase):
    label = Label(shape=(None, 2))
    smce = SoftMaxCrossEntropy(in_layers=[label, dense])
    loss = ReduceMean(in_layers=[smce])
    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    tg.add_output(output)
    tg.set_loss(loss)
    tg.fit(dataset, nb_epoch=1)
@@ -237,7 +240,7 @@ class TestTensorGraph(unittest.TestCase):

    total_loss = ReduceMean(in_layers=[smce])

    tg = dc.models.TensorGraph(learning_rate=0.1)
    tg = dc.models.TensorGraph(learning_rate=0.01)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)
Loading