Use queue in TensorflowGraphModel (ed7782f0) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorflow_models/IRV.py

+19 −4

Original line number	Diff line number	Diff line
		@@ -95,20 +95,35 @@ class TensorflowMultiTaskIRVClassifier(TensorflowLogisticRegression):
		with graph.as_default():
		output = []
		with placeholder_scope:
		self.features = tf.placeholder(
		mol_features = tf.placeholder(
		tf.float32, shape=[None, self.n_features], name='mol_features')
		with tf.name_scope('variable'):
		V = tf.Variable(tf.constant([0.01, 1.]), name="vote", dtype=tf.float32)
		W = tf.Variable(tf.constant([1., 1.]), name="w", dtype=tf.float32)
		b = tf.Variable(tf.constant([0.01]), name="b", dtype=tf.float32)
		b2 = tf.Variable(tf.constant([0.01]), name="b2", dtype=tf.float32)

		label_placeholders = self.add_label_placeholders(graph, name_scopes)
		weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
		if training:
		graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
		graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
		queue_outputs = graph.queue.dequeue()
		labels = queue_outputs[1:len(label_placeholders)+1]
		weights = queue_outputs[len(label_placeholders)+1:]
		features = queue_outputs[0]
		else:
		labels = label_placeholders
		weights = weight_placeholders
		features = mol_features

		for count in range(self.n_tasks):
		similarity = self.features[:, 2 * K * count:(2 * K * count + K)]
		similarity = features[:, 2 * K * count:(2 * K * count + K)]
		ys = tf.to_int32(
		self.features[:, (2 * K * count + K):2 * K * (count + 1)])
		features[:, (2 * K * count + K):2 * K * (count + 1)])
		R = b + W[0] * similarity + W[1] * tf.constant(
		np.arange(K) + 1, dtype=tf.float32)
		R = tf.sigmoid(R)
		z = tf.reduce_sum(R * tf.gather(V, ys), axis=1) + b2
		output.append(tf.reshape(z, shape=[-1, 1]))
		return output
		return (output, labels, weights)

deepchem/models/tensorflow_models/init.py

+45 −24

Original line number	Diff line number	Diff line
		@@ -13,6 +13,7 @@ import numpy as np
		import pandas as pd
		import tensorflow as tf
		import tempfile
		import threading
		from deepchem.models import Model
		from deepchem.metrics import from_one_hot
		from deepchem.nn import model_ops
		@@ -239,9 +240,7 @@ class TensorflowGraphModel(Model):
		with graph.as_default():
		if seed is not None:
		tf.set_random_seed(seed)
		output = self.build(graph, name_scopes, training)
		labels = self.add_label_placeholders(graph, name_scopes)
		weights = self.add_example_weight_placeholders(graph, name_scopes)
		(output, labels, weights) = self.build(graph, name_scopes, training)

		if training:
		loss = self.add_training_cost(graph, name_scopes, output, labels, weights)
		@@ -295,6 +294,7 @@ class TensorflowGraphModel(Model):

		return loss


		def fit(self,
		dataset,
		nb_epoch=10,
		@@ -335,33 +335,54 @@ class TensorflowGraphModel(Model):
		saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
		# Save an initial checkpoint.
		saver.save(sess, self._save_path, global_step=0)

		# Define the code that runs on a separate thread to feed data into the queue.
		def enqueue(sess, dataset, nb_epoch, epoch_end_indices):
		index = 0
		for epoch in range(nb_epoch):
		avg_loss, n_batches = 0., 0
		for ind, (X_b, y_b, w_b, ids_b) in enumerate(
		# Turns out there are valid cases where we don't want pad-batches
		# on by default.
		#dataset.iterbatches(batch_size, pad_batches=True)):
		dataset.iterbatches(
		self.batch_size, pad_batches=self.pad_batches)):
		if ind % log_every_N_batches == 0:
		log("On batch %d" % ind, self.verbose)
		# Run training op.
		for X_b, y_b, w_b, ids_b in dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches):
		feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
		sess.run(self.train_graph.graph.enqueue, feed_dict=feed_dict)
		index += 1
		epoch_end_indices.append(index)
		sess.run(self.train_graph.graph.queue.close())

		epoch_end_indices = []
		enqueue_thread = threading.Thread(target=enqueue, args=[sess, dataset, nb_epoch, epoch_end_indices])
		enqueue_thread.daemon = True
		enqueue_thread.start()

		# Main training loop.
		try:
		epoch = 0
		index = 0
		index_in_epoch = 0
		avg_loss = 0.0
		while True:
		if index_in_epoch % log_every_N_batches == 0:
		log("On batch %d" % index_in_epoch, self.verbose)
		# Run training op.
		fetches = self.train_graph.output + [
		train_op, self.train_graph.loss
		]
		fetched_values = sess.run(fetches, feed_dict=feed_dict)
		output = fetched_values[:len(self.train_graph.output)]
		fetched_values = sess.run(fetches)
		loss = fetched_values[-1]
		avg_loss += loss
		y_pred = np.squeeze(np.array(output))
		y_b = y_b.flatten()
		n_batches += 1
		index += 1
		index_in_epoch += 1
		if len(epoch_end_indices) > 0 and index >= epoch_end_indices[0]:
		# We have reached the end of an epoch.
		if epoch % checkpoint_interval == checkpoint_interval - 1:
		saver.save(sess, self._save_path, global_step=epoch)
		avg_loss = float(avg_loss) / n_batches
		avg_loss = float(avg_loss) / index_in_epoch
		log('Ending epoch %d: Average loss %g' % (epoch, avg_loss),
		self.verbose)
		index_in_epoch = 0
		avg_loss = 0.0
		del epoch_end_indices[0]
		except tf.errors.OutOfRangeError:
		# We have reached the end of the data.
		pass
		# Always save a final checkpoint when complete.
		saver.save(sess, self._save_path, global_step=epoch + 1)
		############################################################## TIMING

deepchem/models/tensorflow_models/fcnet.py

+32 −6

Original line number	Diff line number	Diff line
		@@ -33,7 +33,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		n_features = self.n_features
		with graph.as_default():
		with placeholder_scope:
		self.mol_features = tf.placeholder(
		mol_features = tf.placeholder(
		tf.float32, shape=[None, n_features], name='mol_features')

		layer_sizes = self.layer_sizes
		@@ -50,7 +50,20 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		n_layers = lengths_set.pop()
		assert n_layers > 0, 'Must have some layers defined.'

		prev_layer = self.mol_features
		label_placeholders = self.add_label_placeholders(graph, name_scopes)
		weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
		if training:
		graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
		graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
		queue_outputs = graph.queue.dequeue()
		labels = queue_outputs[1:len(label_placeholders)+1]
		weights = queue_outputs[len(label_placeholders)+1:]
		prev_layer = queue_outputs[0]
		else:
		labels = label_placeholders
		weights = weight_placeholders
		prev_layer = mol_features

		prev_layer_size = n_features
		for i in range(n_layers):
		layer = tf.nn.relu(
		@@ -67,7 +80,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		prev_layer_size = layer_sizes[i]

		output = model_ops.multitask_logits(layer, self.n_tasks)
		return output
		return (output, labels, weights)

		def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
		"""Construct a feed dictionary from minibatch data.
		@@ -112,7 +125,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
		name_scopes)
		with graph.as_default():
		with placeholder_scope:
		self.mol_features = tf.placeholder(
		mol_features = tf.placeholder(
		tf.float32, shape=[None, n_features], name='mol_features')

		layer_sizes = self.layer_sizes
		@@ -129,7 +142,20 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
		n_layers = lengths_set.pop()
		assert n_layers > 0, 'Must have some layers defined.'

		prev_layer = self.mol_features
		label_placeholders = self.add_label_placeholders(graph, name_scopes)
		weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
		if training:
		graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
		graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
		queue_outputs = graph.queue.dequeue()
		labels = queue_outputs[1:len(label_placeholders)+1]
		weights = queue_outputs[len(label_placeholders)+1:]
		prev_layer = queue_outputs[0]
		else:
		labels = label_placeholders
		weights = weight_placeholders
		prev_layer = mol_features

		prev_layer_size = n_features
		for i in range(n_layers):
		layer = tf.nn.relu(
		@@ -157,7 +183,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
		stddev=weight_init_stddevs[i]),
		bias_init=tf.constant(value=bias_init_consts[i], shape=[1
		]))))
		return output
		return (output, labels, weights)

		def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
		"""Construct a feed dictionary from minibatch data.

deepchem/nn/model_ops.py

+0 −3

Original line number	Diff line number	Diff line
		@@ -827,9 +827,6 @@ def fully_connected_layer(tensor,
		#print("tensor")
		#print(tensor)
		###################################################### DEBUG
		if len(tensor.get_shape()) != 2:
		raise ValueError('Dense layer input must be 2D, not %dD' %
		len(tensor.get_shape()))
		if weight_init is None:
		num_features = tensor.get_shape()[-1].value
		weight_init = tf.truncated_normal([num_features, size], stddev=0.01)

Admin message