Merge pull request #461 from peastman/checkpoint (f30dd679) · Commits · 钟慕尧 / deepchem

deepchem/models/sequential.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -133,7 +133,8 @@ class Sequential(Model):
		max_checkpoints_to_keep=5,
		log_every_N_batches=50,
		learning_rate=.001,
		batch_size=50):
		batch_size=50,
		checkpoint_interval=10):
		"""Trains the model for a fixed number of epochs.

		TODO(rbharath0: This is mostly copied from TensorflowGraphModel. Should
		@@ -151,6 +152,8 @@ class Sequential(Model):
		1 for progress bar logging, 2 for one log line per epoch.
		initial_epoch: epoch at which to start training
		(useful for resuming a previous training run)
		checkpoint_interval: int
		Frequency at which to write checkpoints, measured in epochs
		"""
		############################################################## TIMING
		time1 = time.time()
		@@ -180,6 +183,7 @@ class Sequential(Model):
		y_pred = np.squeeze(np.array(output))
		y_b = y_b.flatten()
		n_batches += 1
		if epoch % checkpoint_interval == checkpoint_interval - 1:
		saver.save(sess, self._save_path, global_step=epoch)
		avg_loss = float(avg_loss) / n_batches
		print('Ending epoch %d: Average loss %g' % (epoch, avg_loss))

deepchem/models/tensorflow_models/init.py

+116 −77

Original line number	Diff line number	Diff line
		@@ -23,6 +23,7 @@ from deepchem.utils.evaluate import Evaluator
		from deepchem.data import pad_features
		from tensorflow.contrib.layers.python.layers import batch_norm


		def softmax(x):
		"""Simple numpy softmax implementation
		"""
		@@ -42,9 +43,12 @@ def softmax(x):
		x /= row_sum.reshape(x.shape[:2] + (1,))
		return x


		class TensorflowGraph(object):
		"""Simple class that holds information needed to run Tensorflow graph."""
		def __init__(self, graph, session, name_scopes, output, labels, weights, loss):

		def __init__(self, graph, session, name_scopes, output, labels, weights,
		loss):
		self.graph = graph
		self.session = session
		self.name_scopes = name_scopes
		@@ -57,7 +61,8 @@ class TensorflowGraph(object):
		def get_placeholder_scope(graph, name_scopes):
		"""Gets placeholder scope."""
		placeholder_root = "placeholders"
		return TensorflowGraph.shared_name_scope(placeholder_root, graph, name_scopes)
		return TensorflowGraph.shared_name_scope(placeholder_root, graph,
		name_scopes)

		@staticmethod
		def shared_name_scope(name, graph, name_scopes):
		@@ -84,6 +89,7 @@ class TensorflowGraph(object):
		feed_dict['{}/{}:0'.format(placeholder_root, name)] = value
		return feed_dict


		class TensorflowGraphModel(Model):
		"""Parent class for deepchem Tensorflow models.

		@@ -107,11 +113,25 @@ class TensorflowGraphModel(Model):
		logdir: Directory for output files.
		"""

		def __init__(self, n_tasks, n_features, logdir=None, layer_sizes=[1000],
		weight_init_stddevs=[.02], bias_init_consts=[1.], penalty=0.0,
		penalty_type="l2", dropouts=[0.5], learning_rate=.001,
		momentum=.9, optimizer="adam", batch_size=50, n_classes=2,
		pad_batches=False, verbose=True, seed=None, **kwargs):
		def __init__(self,
		n_tasks,
		n_features,
		logdir=None,
		layer_sizes=[1000],
		weight_init_stddevs=[.02],
		bias_init_consts=[1.],
		penalty=0.0,
		penalty_type="l2",
		dropouts=[0.5],
		learning_rate=.001,
		momentum=.9,
		optimizer="adam",
		batch_size=50,
		n_classes=2,
		pad_batches=False,
		verbose=True,
		seed=None,
		**kwargs):
		"""Constructs the computational graph.

		This function constructs the computational graph for the model. It relies
		@@ -228,7 +248,8 @@ class TensorflowGraphModel(Model):
		else:
		loss = None
		output = self.add_output_ops(graph, output) # add softmax heads
		return TensorflowGraph(graph=graph,
		return TensorflowGraph(
		graph=graph,
		session=shared_session,
		name_scopes=name_scopes,
		output=output,
		@@ -245,8 +266,8 @@ class TensorflowGraphModel(Model):
		with TensorflowGraph.shared_name_scope('costs', graph, name_scopes):
		for task in range(self.n_tasks):
		task_str = str(task).zfill(len(str(self.n_tasks)))
		with TensorflowGraph.shared_name_scope(
		'cost_{}'.format(task_str), graph, name_scopes):
		with TensorflowGraph.shared_name_scope('cost_{}'.format(task_str),
		graph, name_scopes):
		with tf.name_scope('weighted'):
		weighted_cost = self.cost(output[task], labels[task],
		weights[task])
		@@ -257,12 +278,13 @@ class TensorflowGraphModel(Model):
		# non-zero weight examples in the batch. Also, instead of using
		# tf.reduce_mean (which can put ops on the CPU) we explicitly
		# calculate with div/sum so it stays on the GPU.
		gradient_cost = tf.div(tf.reduce_sum(weighted_cost),
		self.batch_size)
		gradient_cost = tf.div(
		tf.reduce_sum(weighted_cost), self.batch_size)
		gradient_costs.append(gradient_cost)

		# aggregated costs
		with TensorflowGraph.shared_name_scope('aggregated', graph, name_scopes):
		with TensorflowGraph.shared_name_scope('aggregated', graph,
		name_scopes):
		with tf.name_scope('gradient'):
		loss = tf.add_n(gradient_costs)

		@@ -273,8 +295,13 @@ class TensorflowGraphModel(Model):

		return loss

		def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5,
		log_every_N_batches=50, **kwargs):
		def fit(self,
		dataset,
		nb_epoch=10,
		max_checkpoints_to_keep=5,
		log_every_N_batches=50,
		checkpoint_interval=10,
		**kwargs):
		"""Fit the model.

		Parameters
		@@ -288,6 +315,8 @@ class TensorflowGraphModel(Model):
		log_every_N_batches: int
		Report every N batches. Useful for training on very large datasets,
		where epochs can take long time to finish.
		checkpoint_interval: int
		Frequency at which to write checkpoints, measured in epochs

		Raises
		------
		@@ -299,8 +328,8 @@ class TensorflowGraphModel(Model):
		############################################################## TIMING
		log("Training for %d epochs" % nb_epoch, self.verbose)
		with self.train_graph.graph.as_default():
		train_op = self.get_training_op(
		self.train_graph.graph, self.train_graph.loss)
		train_op = self.get_training_op(self.train_graph.graph,
		self.train_graph.loss)
		with self._get_shared_session(train=True) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
		@@ -312,13 +341,15 @@ class TensorflowGraphModel(Model):
		# Turns out there are valid cases where we don't want pad-batches
		# on by default.
		#dataset.iterbatches(batch_size, pad_batches=True)):
		dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches)):
		dataset.iterbatches(
		self.batch_size, pad_batches=self.pad_batches)):
		if ind % log_every_N_batches == 0:
		log("On batch %d" % ind, self.verbose)
		# Run training op.
		feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
		fetches = self.train_graph.output + [
		train_op, self.train_graph.loss]
		train_op, self.train_graph.loss
		]
		fetched_values = sess.run(fetches, feed_dict=feed_dict)
		output = fetched_values[:len(self.train_graph.output)]
		loss = fetched_values[-1]
		@@ -326,15 +357,16 @@ class TensorflowGraphModel(Model):
		y_pred = np.squeeze(np.array(output))
		y_b = y_b.flatten()
		n_batches += 1
		if epoch % checkpoint_interval == checkpoint_interval - 1:
		saver.save(sess, self._save_path, global_step=epoch)
		avg_loss = float(avg_loss) / n_batches
		log('Ending epoch %d: Average loss %g' % (epoch, avg_loss), self.verbose)
		log('Ending epoch %d: Average loss %g' % (epoch, avg_loss),
		self.verbose)
		# Always save a final checkpoint when complete.
		saver.save(sess, self._save_path, global_step=epoch + 1)
		############################################################## TIMING
		time2 = time.time()
		print("TIMING: model fitting took %0.3f s" % (time2-time1),
		self.verbose)
		print("TIMING: model fitting took %0.3f s" % (time2 - time1), self.verbose)
		############################################################## TIMING

		def add_output_ops(self, graph, output):
		@@ -365,7 +397,6 @@ class TensorflowGraphModel(Model):
		"""
		raise NotImplementedError('Must be overridden by concrete subclass')


		def add_label_placeholders(self, graph, name_scopes):
		"""Add Placeholders for labels for each task.

		@@ -389,12 +420,14 @@ class TensorflowGraphModel(Model):
		feeding and fetching the same tensor.
		"""
		weights = []
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph,
		name_scopes)
		with placeholder_scope:
		for task in range(self.n_tasks):
		weights.append(tf.identity(
		tf.placeholder(tf.float32, shape=[None],
		name='weights_%d' % task)))
		weights.append(
		tf.identity(
		tf.placeholder(
		tf.float32, shape=[None], name='weights_%d' % task)))
		return weights

		def cost(self, output, labels, weights):
		@@ -421,7 +454,8 @@ class TensorflowGraphModel(Model):
		A training op.
		"""
		with graph.as_default():
		opt = model_ops.optimizer(self.optimizer, self.learning_rate, self.momentum)
		opt = model_ops.optimizer(self.optimizer, self.learning_rate,
		self.momentum)
		return opt.minimize(loss, name='train')

		def _get_shared_session(self, train):
		@@ -450,8 +484,7 @@ class TensorflowGraphModel(Model):
		last_checkpoint = self._find_last_checkpoint()
		# TODO(rbharath): Is setting train=False right here?
		saver = tf.train.Saver()
		saver.restore(self._get_shared_session(train=False),
		last_checkpoint)
		saver.restore(self._get_shared_session(train=False), last_checkpoint)
		self._restored_model = True

		def predict(self, dataset, transformers=[]):
		@@ -550,6 +583,7 @@ class TensorflowGraphModel(Model):
		pass
		return os.path.join(self.logdir, last_checkpoint)


		class TensorflowClassifier(TensorflowGraphModel):
		"""Classification model.

		@@ -557,6 +591,7 @@ class TensorflowClassifier(TensorflowGraphModel):
		output: logits op(s) used for computing classification loss and predicted
		class probabilities for each task.
		"""

		def get_task_type(self):
		return "classification"

		@@ -573,8 +608,8 @@ class TensorflowClassifier(TensorflowGraphModel):
		A tensor with shape batch_size containing the weighted cost for each
		example.
		"""
		return tf.mul(tf.nn.softmax_cross_entropy_with_logits(logits, labels),
		weights)
		return tf.mul(
		tf.nn.softmax_cross_entropy_with_logits(logits, labels), weights)

		def add_label_placeholders(self, graph, name_scopes):
		"""Add Placeholders for labels for each task.
		@@ -585,15 +620,19 @@ class TensorflowClassifier(TensorflowGraphModel):
		Placeholders are wrapped in identity ops to avoid the error caused by
		feeding and fetching the same tensor.
		"""
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph,
		name_scopes)
		with graph.as_default():
		batch_size = self.batch_size
		n_classes = self.n_classes
		labels = []
		with placeholder_scope:
		for task in range(self.n_tasks):
		labels.append(tf.identity(
		tf.placeholder(tf.float32, shape=[None, n_classes],
		labels.append(
		tf.identity(
		tf.placeholder(
		tf.float32,
		shape=[None, n_classes],
		name='labels_%d' % task)))
		return labels

		@@ -639,14 +678,12 @@ class TensorflowClassifier(TensorflowGraphModel):
		elif batch_output.ndim == 2:
		batch_output = batch_output.transpose((1, 0))
		else:
		raise ValueError(
		'Unrecognized rank combination for output: %s' %
		raise ValueError('Unrecognized rank combination for output: %s' %
		(batch_output.shape,))
		output.append(batch_output)

		outputs = np.array(from_one_hot(
		np.squeeze(np.concatenate(output)), axis=-1))

		outputs = np.array(
		from_one_hot(np.squeeze(np.concatenate(output)), axis=-1))

		outputs = np.copy(outputs)
		outputs = np.reshape(outputs, (len(X), n_tasks))
		@@ -689,8 +726,7 @@ class TensorflowClassifier(TensorflowGraphModel):
		elif batch_outputs.ndim == 2:
		batch_outputs = batch_outputs.transpose((1, 0))
		else:
		raise ValueError(
		'Unrecognized rank combination for output: %s ' %
		raise ValueError('Unrecognized rank combination for output: %s ' %
		(batch_outputs.shape,))

		# Note that softmax is already applied in construct_grpah
		@@ -698,6 +734,7 @@ class TensorflowClassifier(TensorflowGraphModel):

		return np.copy(outputs)


		class TensorflowRegressor(TensorflowGraphModel):
		"""Regression model.

		@@ -705,6 +742,7 @@ class TensorflowRegressor(TensorflowGraphModel):
		output: Op(s) used for computing regression loss and predicted regression
		outputs for each task.
		"""

		def get_task_type(self):
		return "regressor"

		@@ -735,15 +773,17 @@ class TensorflowRegressor(TensorflowGraphModel):
		Placeholders are wrapped in identity ops to avoid the error caused by
		feeding and fetching the same tensor.
		"""
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph,
		name_scopes)
		with graph.as_default():
		batch_size = self.batch_size
		labels = []
		with placeholder_scope:
		for task in range(self.n_tasks):
		labels.append(tf.identity(
		tf.placeholder(tf.float32, shape=[None],
		name='labels_%d' % task)))
		labels.append(
		tf.identity(
		tf.placeholder(
		tf.float32, shape=[None], name='labels_%d' % task)))
		return labels

		def predict_on_batch(self, X):
		@@ -793,8 +833,7 @@ class TensorflowRegressor(TensorflowGraphModel):
		n_samples = len(X)
		batch_outputs = batch_outputs.reshape((n_samples, n_tasks))
		else:
		raise ValueError(
		'Unrecognized rank combination for output: %s' %
		raise ValueError('Unrecognized rank combination for output: %s' %
		(batch_outputs.shape))
		# Prune away any padding that was added
		batch_outputs = batch_outputs[:n_samples]

deepchem/models/tensorflow_models/fcnet.py

+5 −1

Original line number	Diff line number	Diff line
		@@ -308,6 +308,7 @@ class TensorflowMultiTaskFitTransformRegressor(TensorflowMultiTaskRegressor):
		nb_epoch=10,
		max_checkpoints_to_keep=5,
		log_every_N_batches=50,
		checkpoint_interval=10,
		**kwargs):
		"""Perform fit transformations on each minibatch. Fit the model.

		@@ -322,6 +323,8 @@ class TensorflowMultiTaskFitTransformRegressor(TensorflowMultiTaskRegressor):
		log_every_N_batches: int
		Report every N batches. Useful for training on very large datasets,
		where epochs can take long time to finish.
		checkpoint_interval: int
		Frequency at which to write checkpoints, measured in epochs

		Raises
		------
		@@ -361,6 +364,7 @@ class TensorflowMultiTaskFitTransformRegressor(TensorflowMultiTaskRegressor):
		y_pred = np.squeeze(np.array(output))
		y_b = y_b.flatten()
		n_batches += 1
		if epoch % checkpoint_interval == checkpoint_interval - 1:
		saver.save(sess, self._save_path, global_step=epoch)
		avg_loss = float(avg_loss) / n_batches
		log('Ending epoch %d: Average loss %g' % (epoch, avg_loss),

deepchem/models/tensorflow_models/progressive_joint.py

+84 −71

File changed.

Preview size limit exceeded, changes collapsed.

deepchem/models/tensorflow_models/progressive_multitask.py

+125 −96

File changed.

Preview size limit exceeded, changes collapsed.

Admin message