Merge pull request #1083 from rbharath/one_shot (6a442b74) · Commits · 钟慕尧 / deepchem

contrib/one_shot_models/graph_models.py

0 → 100644

+334 −0

Original line number	Diff line number	Diff line
		"""
		Convenience classes for assembling graph models.
		"""
		from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		__author__ = "Han Altae-Tran and Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"

		import warnings
		import tensorflow as tf
		from deepchem.nn.layers import GraphGather
		from deepchem.models.tf_new_models.graph_topology import GraphTopology, DTNNGraphTopology, DAGGraphTopology, WeaveGraphTopology, AlternateWeaveGraphTopology


		class SequentialGraph(object):
		"""An analog of Keras Sequential class for Graph data.

		Like the Sequential class from Keras, but automatically passes topology
		placeholders from GraphTopology to each graph layer (from layers) added
		to the network. Non graph layers don't get the extra placeholders.
		"""

		def __init__(self, n_feat):
		"""
		Parameters
		----------
		n_feat: int
		Number of features per atom.
		"""
		warnings.warn("SequentialGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		with self.graph.as_default():
		self.graph_topology = GraphTopology(n_feat)
		self.output = self.graph_topology.get_atom_features_placeholder()
		# Keep track of the layers
		self.layers = []

		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		# For graphical layers, add connectivity placeholders
		if type(layer).__name__ in ['GraphConv', 'GraphGather', 'GraphPool']:
		if (len(self.layers) > 0 and hasattr(self.layers[-1], "__name__")):
		assert self.layers[-1].__name__ != "GraphGather", \
		'Cannot use GraphConv or GraphGather layers after a GraphGather'

		self.output = layer([self.output] +
		self.graph_topology.get_topology_placeholders())
		else:
		self.output = layer(self.output)

		# Add layer to the layer list
		self.layers.append(layer)

		def get_graph_topology(self):
		return self.graph_topology

		def get_num_output_features(self):
		"""Gets the output shape of the featurization layers of the network"""
		return self.layers[-1].output_shape[1]

		def return_outputs(self):
		return self.output

		def return_inputs(self):
		return self.graph_topology.get_input_placeholders()

		def get_layer(self, layer_id):
		return self.layers[layer_id]


		class SequentialDTNNGraph(SequentialGraph):
		"""An analog of Keras Sequential class for Coulomb Matrix data.

		automatically generates and passes topology placeholders to each layer.
		"""

		def __init__(self, n_distance=100, distance_min=-1., distance_max=18.):
		"""
		Parameters
		----------
		n_distance: int, optional
		granularity of distance matrix
		step size will be (distance_max-distance_min)/n_distance
		distance_min: float, optional
		minimum distance of atom pairs, default = -1 Angstorm
		distance_max: float, optional
		maximum distance of atom pairs, default = 18 Angstorm
		"""
		warnings.warn("SequentialDTNNGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		with self.graph.as_default():
		self.graph_topology = DTNNGraphTopology(
		n_distance, distance_min=distance_min, distance_max=distance_max)
		self.output = self.graph_topology.get_atom_number_placeholder()
		# Keep track of the layers
		self.layers = []

		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		if type(layer).__name__ in ['DTNNStep']:
		self.output = layer([self.output] +
		self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['DTNNGather']:
		self.output = layer(
		[self.output, self.graph_topology.atom_membership_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)


		class SequentialDAGGraph(SequentialGraph):
		"""SequentialGraph for DAG models
		"""

		def __init__(self, n_atom_feat=75, max_atoms=50):
		"""
		Parameters
		----------
		n_atom_feat: int, optional
		Number of features per atom.
		max_atoms: int, optional
		Maximum number of atoms in a molecule, should be defined based on dataset
		"""
		warnings.warn("SequentialDAGGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		with self.graph.as_default():
		self.graph_topology = DAGGraphTopology(
		n_atom_feat=n_atom_feat, max_atoms=max_atoms)
		self.output = self.graph_topology.get_atom_features_placeholder()
		self.layers = []

		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		if type(layer).__name__ in ['DAGLayer']:
		self.output = layer([self.output] +
		self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['DAGGather']:
		self.output = layer(
		[self.output, self.graph_topology.membership_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)


		class SequentialWeaveGraph(SequentialGraph):
		"""SequentialGraph for Weave models
		"""

		def __init__(self, max_atoms=50, n_atom_feat=75, n_pair_feat=14):
		"""
		Parameters
		----------
		max_atoms: int, optional
		Maximum number of atoms in a molecule, should be defined based on dataset
		n_atom_feat: int, optional
		Number of features per atom.
		n_pair_feat: int, optional
		Number of features per pair of atoms.
		"""
		warnings.warn("SequentialWeaveGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		self.max_atoms = max_atoms
		self.n_atom_feat = n_atom_feat
		self.n_pair_feat = n_pair_feat
		with self.graph.as_default():
		self.graph_topology = WeaveGraphTopology(self.max_atoms, self.n_atom_feat,
		self.n_pair_feat)
		self.output = self.graph_topology.get_atom_features_placeholder()
		self.output_P = self.graph_topology.get_pair_features_placeholder()
		self.layers = []

		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		if type(layer).__name__ in ['WeaveLayer']:
		self.output, self.output_P = layer([
		self.output, self.output_P
		] + self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['WeaveConcat']:
		self.output = layer(
		[self.output, self.graph_topology.atom_mask_placeholder])
		elif type(layer).__name__ in ['WeaveGather']:
		self.output = layer(
		[self.output, self.graph_topology.membership_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)


		class AlternateSequentialWeaveGraph(SequentialGraph):
		"""Alternate implementation of SequentialGraph for Weave models
		"""

		def __init__(self, batch_size, max_atoms=50, n_atom_feat=75, n_pair_feat=14):
		"""
		Parameters
		----------
		batch_size: int
		number of molecules in a batch
		max_atoms: int, optional
		Maximum number of atoms in a molecule, should be defined based on dataset
		n_atom_feat: int, optional
		Number of features per atom.
		n_pair_feat: int, optional
		Number of features per pair of atoms.
		"""
		warnings.warn("AlternateSequentialWeaveGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		self.batch_size = batch_size
		self.max_atoms = max_atoms
		self.n_atom_feat = n_atom_feat
		self.n_pair_feat = n_pair_feat
		with self.graph.as_default():
		self.graph_topology = AlternateWeaveGraphTopology(
		self.batch_size, self.max_atoms, self.n_atom_feat, self.n_pair_feat)
		self.output = self.graph_topology.get_atom_features_placeholder()
		self.output_P = self.graph_topology.get_pair_features_placeholder()
		self.layers = []

		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		if type(layer).__name__ in ['AlternateWeaveLayer']:
		self.output, self.output_P = layer([
		self.output, self.output_P
		] + self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['AlternateWeaveGather']:
		self.output = layer(
		[self.output, self.graph_topology.atom_split_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)


		class SequentialSupportGraph(object):
		"""An analog of Keras Sequential model for test/support models."""

		def __init__(self, n_feat):
		"""
		Parameters
		----------
		n_feat: int
		Number of atomic features.
		"""
		warnings.warn("SequentialSupportWeaveGraph is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		self.graph = tf.Graph()
		with self.graph.as_default():
		# Create graph topology and x
		self.test_graph_topology = GraphTopology(n_feat, name='test')
		self.support_graph_topology = GraphTopology(n_feat, name='support')
		self.test = self.test_graph_topology.get_atom_features_placeholder()
		self.support = self.support_graph_topology.get_atom_features_placeholder()

		# Keep track of the layers
		self.layers = []
		# Whether or not we have used the GraphGather layer yet
		self.bool_pre_gather = True

		def add(self, layer):
		"""Adds a layer to both test/support stacks.

		Note that the layer transformation is performed independently on the
		test/support tensors.
		"""
		with self.graph.as_default():
		self.layers.append(layer)

		# Update new value of x
		if type(layer).__name__ in ['GraphConv', 'GraphGather', 'GraphPool']:
		assert self.bool_pre_gather, "Cannot apply graphical layers after gather."

		self.test = layer([self.test] + self.test_graph_topology.topology)
		self.support = layer([self.support] +
		self.support_graph_topology.topology)
		else:
		self.test = layer(self.test)
		self.support = layer(self.support)

		if type(layer).__name__ == 'GraphGather':
		self.bool_pre_gather = False # Set flag to stop adding topology

		def add_test(self, layer):
		"""Adds a layer to test."""
		with self.graph.as_default():
		self.layers.append(layer)

		# Update new value of x
		if type(layer).__name__ in ['GraphConv', 'GraphPool', 'GraphGather']:
		self.test = layer([self.test] + self.test_graph_topology.topology)
		else:
		self.test = layer(self.test)

		def add_support(self, layer):
		"""Adds a layer to support."""
		with self.graph.as_default():
		self.layers.append(layer)

		# Update new value of x
		if type(layer).__name__ in ['GraphConv', 'GraphPool', 'GraphGather']:
		self.support = layer([self.support] +
		self.support_graph_topology.topology)
		else:
		self.support = layer(self.support)

		def join(self, layer):
		"""Joins test and support to a two input two output layer"""
		with self.graph.as_default():
		self.layers.append(layer)
		self.test, self.support = layer([self.test, self.support])

		def get_test_output(self):
		return self.test

		def get_support_output(self):
		return self.support

		def return_outputs(self):
		return [self.test] + [self.support]

		def return_inputs(self):
		return (self.test_graph_topology.get_inputs() +
		self.support_graph_topology.get_inputs())

contrib/one_shot_models/graph_topology.py

0 → 100644

+604 −0

File added.

Preview size limit exceeded, changes collapsed.

contrib/one_shot_models/multitask_classifier.py

0 → 100644

+286 −0

Original line number	Diff line number	Diff line
		"""
		Implements a multitask graph convolutional classifier.
		"""
		from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		__author__ = "Han Altae-Tran and Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"

		import warnings
		import os
		import sys
		import numpy as np
		import tensorflow as tf
		import sklearn.metrics
		import tempfile
		from deepchem.data import pad_features
		from deepchem.utils.save import log
		from deepchem.models import Model
		from deepchem.nn.copy import Input
		from deepchem.nn.copy import Dense
		from deepchem.nn import model_ops
		# TODO(rbharath): Find a way to get rid of this import?
		from deepchem.models.tf_new_models.graph_topology import merge_dicts


		def get_loss_fn(final_loss):
		# Obtain appropriate loss function
		if final_loss == 'L2':

		def loss_fn(x, t):
		diff = tf.subtract(x, t)
		return tf.reduce_sum(tf.square(diff), 0)
		elif final_loss == 'weighted_L2':

		def loss_fn(x, t, w):
		diff = tf.subtract(x, t)
		weighted_diff = tf.multiply(diff, w)
		return tf.reduce_sum(tf.square(weighted_diff), 0)
		elif final_loss == 'L1':

		def loss_fn(x, t):
		diff = tf.subtract(x, t)
		return tf.reduce_sum(tf.abs(diff), 0)
		elif final_loss == 'huber':

		def loss_fn(x, t):
		diff = tf.subtract(x, t)
		return tf.reduce_sum(
		tf.minimum(0.5 * tf.square(diff),
		huber_d * (tf.abs(diff) - 0.5 * huber_d)), 0)
		elif final_loss == 'cross_entropy':

		def loss_fn(x, t, w):
		costs = tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=t)
		weighted_costs = tf.multiply(costs, w)
		return tf.reduce_sum(weighted_costs)
		elif final_loss == 'hinge':

		def loss_fn(x, t, w):
		t = tf.multiply(2.0, t) - 1
		costs = tf.maximum(0.0, 1.0 - tf.multiply(t, x))
		weighted_costs = tf.multiply(costs, w)
		return tf.reduce_sum(weighted_costs)

		return loss_fn


		class MultitaskGraphClassifier(Model):

		def __init__(self,
		model,
		n_tasks,
		n_feat,
		logdir=None,
		batch_size=50,
		final_loss='cross_entropy',
		learning_rate=.001,
		optimizer_type="adam",
		learning_rate_decay_time=1000,
		beta1=.9,
		beta2=.999,
		pad_batches=True,
		verbose=True):

		warnings.warn("MultitaskGraphClassifier is deprecated. "
		"Will be removed in DeepChem 1.4.", DeprecationWarning)
		super(MultitaskGraphClassifier, self).__init__(
		model_dir=logdir, verbose=verbose)
		self.n_tasks = n_tasks
		self.final_loss = final_loss
		self.model = model
		self.sess = tf.Session(graph=self.model.graph)

		with self.model.graph.as_default():
		# Extract model info
		self.batch_size = batch_size
		self.pad_batches = pad_batches
		# Get graph topology for x
		self.graph_topology = self.model.get_graph_topology()
		self.feat_dim = n_feat

		# Raw logit outputs
		self.logits = self.build()
		self.loss_op = self.add_training_loss(self.final_loss, self.logits)
		self.outputs = self.add_softmax(self.logits)

		self.learning_rate = learning_rate
		self.T = learning_rate_decay_time
		self.optimizer_type = optimizer_type

		self.optimizer_beta1 = beta1
		self.optimizer_beta2 = beta2

		# Set epsilon
		self.epsilon = 1e-7
		self.add_optimizer()

		# Initialize
		self.init_fn = tf.global_variables_initializer()
		self.sess.run(self.init_fn)

		# Path to save checkpoint files, which matches the
		# replicated supervisor's default path.
		self._save_path = os.path.join(self.model_dir, 'model.ckpt')

		def build(self):
		# Create target inputs
		self.label_placeholder = tf.placeholder(
		dtype='bool', shape=(None, self.n_tasks), name="label_placeholder")
		self.weight_placeholder = tf.placeholder(
		dtype='float32', shape=(None, self.n_tasks), name="weight_placholder")

		feat = self.model.return_outputs()
		################################################################ DEBUG
		#print("multitask classifier")
		#print("feat")
		#print(feat)
		################################################################ DEBUG
		output = model_ops.multitask_logits(feat, self.n_tasks)
		return output

		def add_optimizer(self):
		if self.optimizer_type == "adam":
		self.optimizer = tf.train.AdamOptimizer(
		self.learning_rate,
		beta1=self.optimizer_beta1,
		beta2=self.optimizer_beta2,
		epsilon=self.epsilon)
		else:
		raise ValueError("Optimizer type not recognized.")

		# Get train function
		self.train_op = self.optimizer.minimize(self.loss_op)

		def construct_feed_dict(self, X_b, y_b=None, w_b=None, training=True):
		"""Get initial information about task normalization"""
		# TODO(rbharath): I believe this is total amount of data
		n_samples = len(X_b)
		if y_b is None:
		y_b = np.zeros((n_samples, self.n_tasks))
		if w_b is None:
		w_b = np.zeros((n_samples, self.n_tasks))
		targets_dict = {self.label_placeholder: y_b, self.weight_placeholder: w_b}

		# Get graph information
		atoms_dict = self.graph_topology.batch_to_feed_dict(X_b)

		# TODO (hraut->rhbarath): num_datapoints should be a vector, with ith element being
		# the number of labeled data points in target_i. This is to normalize each task
		# num_dat_dict = {self.num_datapoints_placeholder : self.}

		# Get other optimizer information
		# TODO(rbharath): Figure out how to handle phase appropriately
		feed_dict = merge_dicts([targets_dict, atoms_dict])
		return feed_dict

		def add_training_loss(self, final_loss, logits):
		"""Computes loss using logits."""
		loss_fn = get_loss_fn(final_loss) # Get loss function
		task_losses = []
		# label_placeholder of shape (batch_size, n_tasks). Split into n_tasks
		# tensors of shape (batch_size,)
		task_labels = tf.split(
		axis=1, num_or_size_splits=self.n_tasks, value=self.label_placeholder)
		task_weights = tf.split(
		axis=1, num_or_size_splits=self.n_tasks, value=self.weight_placeholder)
		for task in range(self.n_tasks):
		task_label_vector = task_labels[task]
		task_weight_vector = task_weights[task]
		# Convert the labels into one-hot vector encodings.
		one_hot_labels = tf.to_float(
		tf.one_hot(tf.to_int32(tf.squeeze(task_label_vector)), 2))
		# Since we use tf.nn.softmax_cross_entropy_with_logits note that we pass in
		# un-softmaxed logits rather than softmax outputs.
		task_loss = loss_fn(logits[task], one_hot_labels, task_weight_vector)
		task_losses.append(task_loss)
		# It's ok to divide by just the batch_size rather than the number of nonzero
		# examples (effect averages out)
		total_loss = tf.add_n(task_losses)
		total_loss = tf.div(total_loss, self.batch_size)
		return total_loss

		def add_softmax(self, outputs):
		"""Replace logits with softmax outputs."""
		softmax = []
		with tf.name_scope('inference'):
		for i, logits in enumerate(outputs):
		softmax.append(tf.nn.softmax(logits, name='softmax_%d' % i))
		return softmax

		def fit(self,
		dataset,
		nb_epoch=10,
		max_checkpoints_to_keep=5,
		log_every_N_batches=50,
		checkpoint_interval=10,
		**kwargs):
		# Perform the optimization
		log("Training for %d epochs" % nb_epoch, self.verbose)

		# TODO(rbharath): Disabling saving for now to try to debug.
		for epoch in range(nb_epoch):
		log("Starting epoch %d" % epoch, self.verbose)
		for batch_num, (X_b, y_b, w_b, ids_b) in enumerate(
		dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches)):
		if batch_num % log_every_N_batches == 0:
		log("On batch %d" % batch_num, self.verbose)
		self.sess.run(
		self.train_op, feed_dict=self.construct_feed_dict(X_b, y_b, w_b))

		def save(self):
		"""
		No-op since this model doesn't currently support saving...
		"""
		pass

		def predict(self, dataset, transformers=[], **kwargs):
		"""Wraps predict to set batch_size/padding."""
		return super(MultitaskGraphClassifier, self).predict(
		dataset, transformers, batch_size=self.batch_size)

		def predict_proba(self, dataset, transformers=[], n_classes=2, **kwargs):
		"""Wraps predict_proba to set batch_size/padding."""
		return super(MultitaskGraphClassifier, self).predict_proba(
		dataset, transformers, n_classes=n_classes, batch_size=self.batch_size)

		def predict_on_batch(self, X):
		"""Return model output for the provided input.
		"""
		if self.pad_batches:
		X = pad_features(self.batch_size, X)
		# run eval data through the model
		n_tasks = self.n_tasks
		with self.sess.as_default():
		feed_dict = self.construct_feed_dict(X)
		# Shape (n_samples, n_tasks)
		batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

		n_samples = len(X)
		outputs = np.zeros((n_samples, self.n_tasks))
		for task, output in enumerate(batch_outputs):
		outputs[:, task] = np.argmax(output, axis=1)
		return outputs

		def predict_proba_on_batch(self, X, n_classes=2):
		"""Returns class probabilities on batch"""
		# run eval data through the model
		if self.pad_batches:
		X = pad_features(self.batch_size, X)
		n_tasks = self.n_tasks
		with self.sess.as_default():
		feed_dict = self.construct_feed_dict(X)
		batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

		n_samples = len(X)
		outputs = np.zeros((n_samples, self.n_tasks, n_classes))
		for task, output in enumerate(batch_outputs):
		outputs[:, task, :] = output
		return outputs

		def get_num_tasks(self):
		"""Needed to use Model.predict() from superclass."""
		return self.n_tasks

contrib/one_shot_models/multitask_regressor.py

0 → 100644

+236 −0

File added.

Preview size limit exceeded, changes collapsed.

contrib/one_shot_models/support_classifier.py

0 → 100644

+388 −0

File added.

Preview size limit exceeded, changes collapsed.

Admin message