pytorch multitask models (db2e51ad) · Commits · 钟慕尧 / deepchem

deepchem/models/init.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -22,3 +22,6 @@ from deepchem.models.tensorflow_models.lr import TensorflowLogisticRegression
		from deepchem.models.tensorflow_models.progressive_multitask import ProgressiveMultitaskRegressor
		from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJointRegressor
		from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier

		from deepchem.models.torch_models.torch_multitask_classification import TorchMultitaskClassification
		from deepchem.models.torch_models.torch_multitask_regression import TorchMultitaskRegression

deepchem/models/torch_models/init.py

0 → 100644

+234 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""
		Created on Mon Mar 13 22:31:24 2017

		@author: Zhenqin Wu
		"""

		import torch
		import time
		import numpy as np
		from deepchem.trans import undo_transforms
		from deepchem.utils.save import log
		from deepchem.models import Model

		class TorchMultitaskModel(Model):
		def __init__(self,
		layer_sizes=[1000],
		weight_init_stddevs=[.02],
		bias_init_consts=[1.],
		penalty=0.0,
		penalty_type="l2",
		dropouts=[0.5],
		learning_rate=.001,
		momentum=.9,
		optimizer="adam",
		batch_size=50,
		pad_batches=False,
		verbose=True,
		seed=None,
		**kwargs):
		"""Constructs the computational graph.

		This function constructs the computational graph for the model. It relies
		subclassed methods (build/cost) to construct specific graphs.

		Parameters
		----------
		layer_sizes: list
		List of layer sizes.
		weight_init_stddevs: list
		List of standard deviations for weights (sampled from zero-mean
		gaussians). One for each layer.
		bias_init_consts: list
		List of bias initializations. One for each layer.
		penalty: float
		Amount of penalty (l2 or l1 applied)
		penalty_type: str
		Either "l2" or "l1"
		dropouts: list
		List of dropout amounts. One for each layer.
		learning_rate: float
		Learning rate for model.
		momentum: float
		Momentum. Only applied if optimizer=="momentum"
		optimizer: str
		Type of optimizer applied.
		batch_size: int
		Size of minibatches for training.GraphConv
		verbose: True
		Perform logging.
		seed: int
		If not none, is used as random seed for tensorflow.
		"""
		# Save hyperparameters
		self.layer_sizes = layer_sizes
		self.weight_init_stddevs = weight_init_stddevs
		self.bias_init_consts = bias_init_consts
		self.penalty = penalty
		self.penalty_type = penalty_type
		self.dropouts = dropouts
		self.learning_rate = learning_rate
		self.momentum = momentum
		self.optimizer = optimizer
		self.batch_size = batch_size
		self.pad_batches = pad_batches
		self.verbose = verbose
		self.seed = seed

		self.build()
		self.optimizer = self.get_training_op()

		def add_training_cost(self, outputs, labels, weights):
		weighted_costs = [] # weighted costs for each example
		for task in range(self.n_tasks):
		weighted_cost = self.cost(outputs[task], labels[:, task],
		weights[:, task])
		weighted_costs.append(weighted_cost)
		loss = torch.cat(weighted_costs).sum()
		# weight decay
		if self.penalty > 0.0:
		for variable in self.regularizaed_variables:
		loss += self.penalty0.5variable.mul(variable).sum()
		return loss

		def get_training_op(self):
		"""Get training op for applying gradients to variables.

		Subclasses that need to do anything fancy with gradients should override
		this method.

		Returns:
		An optimizer
		"""
		if self.optimizer == "adam":
		train_op = torch.optim.Adam(self.trainables, lr=self.learning_rate)
		elif self.optimizer == 'adagrad':
		train_op = torch.optim.Adagrad(self.trainables, lr=self.learning_rate)
		elif self.optimizer == 'rmsprop':
		train_op = torch.optim.RMSprop(self.trainables, lr=self.learning_rate, momentum=self.momentum)
		elif self.optimizer == 'sgd':
		train_op = torch.optim.SGD(self.trainables, lr=self.learning_rate)
		else:
		raise NotImplementedError('Unsupported optimizer %s' % self.optimizer)
		return train_op

		def fit(self,
		dataset,
		nb_epoch=10,
		max_checkpoints_to_keep=5,
		log_every_N_batches=50,
		checkpoint_interval=10,
		**kwargs):
		"""Fit the model.

		Parameters
		----------
		dataset: dc.data.Dataset
		Dataset object holding training data
		nb_epoch: 10
		Number of training epochs.
		max_checkpoints_to_keep: int
		Maximum number of checkpoints to keep; older checkpoints will be deleted.
		log_every_N_batches: int
		Report every N batches. Useful for training on very large datasets,
		where epochs can take long time to finish.
		checkpoint_interval: int
		Frequency at which to write checkpoints, measured in epochs

		Raises
		------
		AssertionError
		If model is not in training mode.
		"""
		############################################################## TIMING
		time1 = time.time()
		############################################################## TIMING
		log("Training for %d epochs" % nb_epoch, self.verbose)
		for epoch in range(nb_epoch):
		avg_loss, n_batches = 0., 0
		for ind, (X_b, y_b, w_b, ids_b) in enumerate(
		# Turns out there are valid cases where we don't want pad-batches
		# on by default.
		#dataset.iterbatches(batch_size, pad_batches=True)):
		dataset.iterbatches(
		self.batch_size, pad_batches=self.pad_batches)):
		if ind % log_every_N_batches == 0:
		log("On batch %d" % ind, self.verbose)
		# Run training op.
		self.optimizer.zero_grad()
		X_b_input = torch.autograd.Variable(torch.FloatTensor(X_b))
		y_b_input = torch.autograd.Variable(torch.FloatTensor(y_b))
		w_b_input = torch.autograd.Variable(torch.FloatTensor(w_b))
		outputs = self.forward(X_b_input, training=True)
		loss = self.add_training_cost(outputs, y_b_input, w_b_input)
		loss.backward()
		self.optimizer.step()
		avg_loss += loss
		n_batches += 1
		avg_loss = float(avg_loss.data.numpy()) / n_batches
		log('Ending epoch %d: Average loss %g' % (epoch, avg_loss),
		self.verbose)
		time2 = time.time()
		print("TIMING: model fitting took %0.3f s" % (time2 - time1), self.verbose)
		############################################################## TIMING

		def predict(self, dataset, transformers=[]):
		"""
		Uses self to make predictions on provided Dataset object.

		Returns:
		y_pred: numpy ndarray of shape (n_samples,)
		"""
		y_preds = []
		n_tasks = self.n_tasks
		for (X_batch, _, _, ids_batch) in dataset.iterbatches(
		self.batch_size, deterministic=True):
		n_samples = len(X_batch)
		y_pred_batch = self.predict_on_batch(X_batch)
		assert y_pred_batch.shape == (n_samples, n_tasks)
		y_pred_batch = undo_transforms(y_pred_batch, transformers)
		y_preds.append(y_pred_batch)
		y_pred = np.vstack(y_preds)

		# The iterbatches does padding with zero-weight examples on the last batch.
		# Remove padded examples.
		n_samples = len(dataset)
		y_pred = np.reshape(y_pred, (n_samples, n_tasks))
		# Special case to handle singletasks.
		if n_tasks == 1:
		y_pred = np.reshape(y_pred, (n_samples,))
		return y_pred

		def predict_proba(self, dataset, transformers=[], n_classes=2):
		y_preds = []
		n_tasks = self.n_tasks
		for (X_batch, y_batch, w_batch, ids_batch) in dataset.iterbatches(
		self.batch_size, deterministic=True):
		n_samples = len(X_batch)
		y_pred_batch = self.predict_proba_on_batch(X_batch)
		assert y_pred_batch.shape == (n_samples, n_tasks, n_classes)
		y_pred_batch = undo_transforms(y_pred_batch, transformers)
		y_preds.append(y_pred_batch)
		y_pred = np.vstack(y_preds)
		# The iterbatches does padding with zero-weight examples on the last batch.
		# Remove padded examples.
		n_samples = len(dataset)
		y_pred = y_pred[:n_samples]
		y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes))
		return y_pred

		def build(self):
		raise NotImplementedError('Must be overridden by concrete subclass')

		def forward(self, X, training=False):
		raise NotImplementedError('Must be overridden by concrete subclass')

		def cost(self, logit, label, weight):
		raise NotImplementedError('Must be overridden by concrete subclass')

		def predict_on_batch(self, X_batch):
		raise NotImplementedError('Must be overridden by concrete subclass')

		def predict_proba_on_batch(self, X_batch):
		raise NotImplementedError('Must be overridden by concrete subclass')
		No newline at end of file

deepchem/models/torch_models/torch_multitask_classification.py

0 → 100644

+121 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""
		Created on Mon Mar 13 22:31:24 2017

		@author: Zhenqin Wu
		"""

		import torch
		import numpy as np
		from deepchem.metrics import from_one_hot
		from deepchem.models.torch_models import TorchMultitaskModel

		class TorchMultitaskClassification(TorchMultitaskModel):
		def __init__(self,
		n_tasks,
		n_features,
		n_classes=2,
		**kwargs):
		"""Constructs the computational graph.

		This function constructs the computational graph for the model. It relies
		subclassed methods (build/cost) to construct specific graphs.

		Parameters
		----------
		n_tasks: int
		Number of tasks
		n_features: int
		Number of features.
		n_classes: int
		Number of classes if this is for classification.
		"""
		# Save hyperparameters
		self.n_tasks = n_tasks
		self.n_features = n_features
		self.n_classes = n_classes
		super(TorchMultitaskClassification, self).__init__(**kwargs)

		def build(self):
		"""Constructs the graph architecture as specified in its config.

		This method creates the following Placeholders:
		mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
		batch_size x n_features.
		"""

		layer_sizes = self.layer_sizes
		weight_init_stddevs = self.weight_init_stddevs
		bias_init_consts = self.bias_init_consts
		dropouts = self.dropouts
		lengths_set = {
		len(layer_sizes),
		len(weight_init_stddevs),
		len(bias_init_consts),
		len(dropouts),
		}
		assert len(lengths_set) == 1, 'All layer params must have same length.'
		n_layers = lengths_set.pop()
		assert n_layers > 0, 'Must have some layers defined.'

		prev_layer_size = self.n_features
		self.W_list = []
		self.b_list = []
		for i in range(n_layers):
		W_init = np.random.normal(0, weight_init_stddevs[i],
		(prev_layer_size, layer_sizes[i]))
		W_init = torch.FloatTensor(W_init)
		self.W_list.append(torch.autograd.Variable(W_init, requires_grad=True))
		b_init = np.full((layer_sizes[i],), bias_init_consts[i])
		b_init = torch.FloatTensor(b_init)
		self.b_list.append(torch.autograd.Variable(b_init, requires_grad=True))
		prev_layer_size = layer_sizes[i]

		self.task_W_list = []
		self.task_b_list = []
		for i in range(self.n_tasks):
		W_init = np.random.normal(0, weight_init_stddevs[-1],
		(prev_layer_size, self.n_classes))
		W_init = torch.FloatTensor(W_init)
		self.task_W_list.append(torch.autograd.Variable(W_init, requires_grad=True))
		b_init = np.full((self.n_classes,), bias_init_consts[-1])
		b_init = torch.FloatTensor(b_init)
		self.task_b_list.append(torch.autograd.Variable(b_init, requires_grad=True))
		self.trainables = self.W_list + self.b_list + self.task_W_list + self.task_b_list
		self.regularizaed_variables = self.W_list + self.task_W_list

		def forward(self, X, training=False):
		for i, W in enumerate(self.W_list):
		X = X.mm(W)
		X += self.b_list[i].unsqueeze(0).expand_as(X)
		X = torch.nn.ReLU()(X)
		if training:
		X = torch.nn.Dropout(p=self.dropouts[i])(X)
		outputs = []
		for i, W in enumerate(self.task_W_list):
		output = X.mm(W)
		output += self.task_b_list[i].unsqueeze(0).expand_as(output)
		if not training:
		output = torch.nn.functional.softmax(output)
		outputs.append(output)
		return outputs

		def cost(self, logit, label, weight):
		loss = []
		for i in range(logit.size()[0]):
		loss.append(torch.nn.functional.cross_entropy(logit[i,:], label[i].long()).mul(weight[i]))
		loss = torch.cat(loss).mean()
		return loss

		def predict_on_batch(self, X_batch):
		X_batch = torch.autograd.Variable(torch.FloatTensor(X_batch))
		outputs = self.forward(X_batch, training=False)
		y_pred_batch = torch.stack(outputs, 1).data.numpy()[:]
		y_pred_batch = from_one_hot(y_pred_batch, 2)
		return y_pred_batch

		def predict_proba_on_batch(self, X_batch):
		X_batch = torch.autograd.Variable(torch.FloatTensor(X_batch))
		outputs = self.forward(X_batch, training=False)
		y_pred_batch = torch.stack(outputs, 1).data.numpy()[:]
		return y_pred_batch
		No newline at end of file

deepchem/models/torch_models/torch_multitask_regression.py

0 → 100644

+114 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""
		Created on Mon Mar 13 22:31:24 2017

		@author: Zhenqin Wu
		"""

		import torch
		import numpy as np
		from deepchem.models.torch_models import TorchMultitaskModel

		class TorchMultitaskRegression(TorchMultitaskModel):
		def __init__(self,
		n_tasks,
		n_features,
		**kwargs):
		"""Constructs the computational graph.

		This function constructs the computational graph for the model. It relies
		subclassed methods (build/cost) to construct specific graphs.

		Parameters
		----------
		n_tasks: int
		Number of tasks
		n_features: int
		Number of features.
		n_classes: int
		Number of classes if this is for classification.
		"""
		# Save hyperparameters
		self.n_tasks = n_tasks
		self.n_features = n_features
		super(TorchMultitaskRegression, self).__init__(**kwargs)

		def build(self):
		"""Constructs the graph architecture as specified in its config.

		This method creates the following Placeholders:
		mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
		batch_size x n_features.
		"""

		layer_sizes = self.layer_sizes
		weight_init_stddevs = self.weight_init_stddevs
		bias_init_consts = self.bias_init_consts
		dropouts = self.dropouts
		lengths_set = {
		len(layer_sizes),
		len(weight_init_stddevs),
		len(bias_init_consts),
		len(dropouts),
		}
		assert len(lengths_set) == 1, 'All layer params must have same length.'
		n_layers = lengths_set.pop()
		assert n_layers > 0, 'Must have some layers defined.'

		prev_layer_size = self.n_features
		self.W_list = []
		self.b_list = []
		for i in range(n_layers):
		W_init = np.random.normal(0, weight_init_stddevs[i],
		(prev_layer_size, layer_sizes[i]))
		W_init = torch.FloatTensor(W_init)
		self.W_list.append(torch.autograd.Variable(W_init, requires_grad=True))
		b_init = np.full((layer_sizes[i],), bias_init_consts[i])
		b_init = torch.FloatTensor(b_init)
		self.b_list.append(torch.autograd.Variable(b_init, requires_grad=True))
		prev_layer_size = layer_sizes[i]

		self.task_W_list = []
		self.task_b_list = []
		for i in range(self.n_tasks):
		W_init = np.random.normal(0, weight_init_stddevs[-1],
		(prev_layer_size, 1))
		W_init = torch.FloatTensor(W_init)
		self.task_W_list.append(torch.autograd.Variable(W_init, requires_grad=True))
		b_init = np.full((1,), bias_init_consts[-1])
		b_init = torch.FloatTensor(b_init)
		self.task_b_list.append(torch.autograd.Variable(b_init, requires_grad=True))
		self.trainables = self.W_list + self.b_list + self.task_W_list + self.task_b_list
		self.regularizaed_variables = self.W_list + self.task_W_list

		def forward(self, X, training=False):
		for i, W in enumerate(self.W_list):
		X = X.mm(W)
		X += self.b_list[i].unsqueeze(0).expand_as(X)
		X = torch.nn.ReLU()(X)
		if training:
		X = torch.nn.Dropout(p=self.dropouts[i])(X)
		outputs = []
		for i, W in enumerate(self.task_W_list):
		output = X.mm(W)
		output += self.task_b_list[i].unsqueeze(0).expand_as(output)
		outputs.append(output)
		return outputs

		def cost(self, logit, label, weight):
		loss = []
		loss_func = torch.nn.MSELoss()
		for i in range(logit.size()[0]):
		loss.append(loss_func(logit[i], label[i]).mul(weight[i]))
		loss = torch.cat(loss).mean()
		return loss

		def predict_on_batch(self, X_batch):
		X_batch = torch.autograd.Variable(torch.FloatTensor(X_batch))
		outputs = self.forward(X_batch, training=False)
		y_pred_batch = torch.stack(outputs, 1).data.numpy()[:]
		y_pred_batch = np.squeeze(y_pred_batch, axis=2)
		return y_pred_batch

		def predict_proba_on_batch(self, X_batch):
		raise NotImplementedError('Regression models cannot predict probability')
		No newline at end of file

Admin message