Merge pull request #568 from lilleswing/a3c-tictactoe (14da45b5) · Commits · 钟慕尧 / deepchem

.travis.yml

+1 −1

Original line number	Diff line number	Diff line
		@@ -20,7 +20,7 @@ install:
		- pip install coveralls
		- python setup.py install
		script:
		- nosetests -a '!slow' --with-timer --with-coverage --cover-package=deepchem -v deepchem --nologcapture
		- nosetests --with-flaky -a '!slow' --with-timer --with-coverage --cover-package=deepchem -v deepchem --nologcapture
		- find ./deepchem \| grep .py$ \|xargs python -m doctest -v
		- bash devtools/travis-ci/test_format_code.sh
		after_success:

contrib/rl/tictactoe.py

0 → 100644

+96 −0

Original line number	Diff line number	Diff line
		import copy
		import random
		import shutil

		import numpy as np
		import tensorflow as tf

		import deepchem as dc
		import deepchem.rl.envs.tictactoe
		from deepchem.models.tensorgraph.layers import Flatten, Dense, SoftMax, \
		BatchNorm, Squeeze


		class TicTacToePolicy(dc.rl.Policy):

		def create_layers(self, state, **kwargs):
		d1 = Flatten(in_layers=state)
		d2 = Dense(
		in_layers=[d1],
		activation_fn=tf.nn.relu,
		normalizer_fn=tf.nn.l2_normalize,
		normalizer_params={"dim": 1},
		out_channels=64)
		d3 = Dense(
		in_layers=[d2],
		activation_fn=tf.nn.relu,
		normalizer_fn=tf.nn.l2_normalize,
		normalizer_params={"dim": 1},
		out_channels=32)
		d4 = Dense(
		in_layers=[d3],
		activation_fn=tf.nn.relu,
		normalizer_fn=tf.nn.l2_normalize,
		normalizer_params={"dim": 1},
		out_channels=16)
		d4 = BatchNorm(in_layers=[d4])
		d5 = Dense(in_layers=[d4], activation_fn=None, out_channels=9)
		value = Dense(in_layers=[d4], activation_fn=None, out_channels=1)
		value = Squeeze(squeeze_dims=1, in_layers=[value])
		probs = SoftMax(in_layers=[d5])
		return {'action_prob': probs, 'value': value}


		def eval_tic_tac_toe(value_weight,
		num_epoch_rounds=1,
		games=10**4,
		rollouts=10**5):
		"""
		Returns the average reward over 1k games after 10k rollouts
		:param value_weight:
		:return:
		"""
		env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment()
		policy = TicTacToePolicy()
		model_dir = "/tmp/tictactoe"
		try:
		shutil.rmtree(model_dir)
		except:
		pass

		avg_rewards = []
		for j in range(num_epoch_rounds):
		a3c = dc.rl.A3C(
		env,
		policy,
		entropy_weight=0.01,
		value_weight=value_weight,
		model_dir=model_dir)
		a3c.optimizer = dc.models.tensorgraph.TFWrapper(
		tf.train.AdamOptimizer, learning_rate=0.01)
		try:
		a3c.restore()
		except:
		print("unable to restore")
		pass
		a3c.fit(rollouts)
		rewards = []
		for i in range(games):
		env.reset()
		reward = -float('inf')
		while not env._terminated:
		action = a3c.select_action(env._state)
		reward = env.step(action)
		rewards.append(reward)
		avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})
		return avg_rewards


		def main():
		value_weight = 6.0
		score = eval_tic_tac_toe(value_weight, num_epoch_rounds=3)
		print(score)


		if __name__ == "__main__":
		main()

deepchem/models/tensorgraph/layers.py

+15 −0

Original line number	Diff line number	Diff line
		@@ -287,6 +287,21 @@ class Reshape(Layer):
		return out_tensor


		class Squeeze(Layer):

		def __init__(self, squeeze_dims, **kwargs):
		self.squeeze_dims = squeeze_dims
		super(Squeeze, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		parent_tensor = inputs[0]
		out_tensor = tf.squeeze(parent_tensor, squeeze_dims=self.squeeze_dims)
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		class Transpose(Layer):

		def __init__(self, perm, **kwargs):

deepchem/models/tensorgraph/tests/test_layers.py

+9 −1

Original line number	Diff line number	Diff line
		@@ -9,7 +9,7 @@ from tensorflow.python.framework import test_util
		from deepchem.feat.mol_graphs import ConvMol
		from deepchem.feat.mol_graphs import MultiConvMol
		from deepchem.feat.graph_features import ConvMolFeaturizer
		from deepchem.models.tensorgraph.layers import Conv1D
		from deepchem.models.tensorgraph.layers import Conv1D, Squeeze
		from deepchem.models.tensorgraph.layers import Dense
		from deepchem.models.tensorgraph.layers import Flatten
		from deepchem.models.tensorgraph.layers import Reshape
		@@ -521,3 +521,11 @@ class TestLayers(test_util.TensorFlowTestCase):
		result = out_tensor.eval()
		assert result.shape == (1, 6, 1)
		assert np.array_equal(value1.reshape((1, 6, 1)) + value2, result)

		def test_squeeze_inputs(self):
		"""Test that layers can automatically reshape inconsistent inputs."""
		value1 = np.random.uniform(size=(2, 1)).astype(np.float32)
		with self.test_session() as sess:
		out_tensor = Squeeze(squeeze_dims=1)(tf.constant(value1))
		result = out_tensor.eval()
		assert result.shape == (2,)

deepchem/models/tensorgraph/tests/test_tensor_graph.py

+11 −8

Original line number	Diff line number	Diff line
		@@ -3,6 +3,7 @@ import unittest
		import numpy as np
		import os
		from nose.tools import assert_true
		from flaky import flaky

		import deepchem as dc
		from deepchem.data import NumpyDataset
		@@ -30,13 +31,14 @@ class TestTensorGraph(unittest.TestCase):
		label = Label(shape=(None, 2))
		smce = SoftMaxCrossEntropy(in_layers=[label, dense])
		loss = ReduceMean(in_layers=[smce])
		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		tg.add_output(output)
		tg.set_loss(loss)
		tg.fit(dataset, nb_epoch=1000)
		prediction = np.squeeze(tg.predict_proba_on_batch(X))
		assert_true(np.all(np.isclose(prediction, y, atol=0.4)))

		@flaky
		def test_multi_task_classifier(self):
		n_data_points = 20
		n_features = 2
		@@ -66,7 +68,7 @@ class TestTensorGraph(unittest.TestCase):

		total_loss = ReduceMean(in_layers=entropies)

		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		for output in outputs:
		tg.add_output(output)
		tg.set_loss(total_loss)
		@@ -90,7 +92,7 @@ class TestTensorGraph(unittest.TestCase):
		dense = Dense(out_channels=1, in_layers=[features])
		label = Label(shape=(None, 1))
		loss = ReduceSquareDifference(in_layers=[dense, label])
		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		tg.add_output(dense)
		tg.set_loss(loss)
		tg.fit(dataset, nb_epoch=1000)
		@@ -125,7 +127,7 @@ class TestTensorGraph(unittest.TestCase):

		total_loss = ReduceMean(in_layers=losses)

		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		for output in outputs:
		tg.add_output(output)
		tg.set_loss(total_loss)
		@@ -139,6 +141,7 @@ class TestTensorGraph(unittest.TestCase):
		y_pred = prediction[:, i, :]
		assert_true(np.all(np.isclose(y_pred, y_real, atol=1.5)))

		@flaky
		def test_no_queue(self):
		n_data_points = 20
		n_features = 2
		@@ -151,7 +154,7 @@ class TestTensorGraph(unittest.TestCase):
		label = Label(shape=(None, 2))
		smce = SoftMaxCrossEntropy(in_layers=[label, dense])
		loss = ReduceMean(in_layers=[smce])
		tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
		tg = dc.models.TensorGraph(learning_rate=0.01, use_queue=False)
		tg.add_output(output)
		tg.set_loss(loss)
		tg.fit(dataset, nb_epoch=1000)
		@@ -173,7 +176,7 @@ class TestTensorGraph(unittest.TestCase):
		tg = dc.models.TensorGraph(
		tensorboard=True,
		tensorboard_log_frequency=1,
		learning_rate=0.1,
		learning_rate=0.01,
		model_dir='/tmp/tensorgraph')
		tg.add_output(output)
		tg.set_loss(loss)
		@@ -197,7 +200,7 @@ class TestTensorGraph(unittest.TestCase):
		label = Label(shape=(None, 2))
		smce = SoftMaxCrossEntropy(in_layers=[label, dense])
		loss = ReduceMean(in_layers=[smce])
		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		tg.add_output(output)
		tg.set_loss(loss)
		tg.fit(dataset, nb_epoch=1)
		@@ -237,7 +240,7 @@ class TestTensorGraph(unittest.TestCase):

		total_loss = ReduceMean(in_layers=[smce])

		tg = dc.models.TensorGraph(learning_rate=0.1)
		tg = dc.models.TensorGraph(learning_rate=0.01)
		for output in outputs:
		tg.add_output(output)
		tg.set_loss(total_loss)

Admin message