SAScore module (b675fbae) · Commits · 钟慕尧 / deepchem

deepchem/models/init.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@ from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskRegresso
		from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor, ProgressiveMultitaskClassifier
		from deepchem.models.tensorgraph.models.graph_models import WeaveModel, DTNNTensorGraph, DAGTensorGraph, GraphConvModel, MPNNTensorGraph
		from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression
		from deepchem.models.tensorgraph.models.scscore import ScScoreModel

		from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
		from deepchem.models.tensorgraph.models.gan import GAN, WGAN

deepchem/models/tensorgraph/layers.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -4325,7 +4325,7 @@ class GraphCNN(Layer):
		return result


		class Hingeloss(Layer):
		class HingeLoss(Layer):
		"""This layer computes the hinge loss on inputs:[labels,logits]
		labels: The values of this tensor is expected to be 1.0 or 0.0. The shape should be the same as logits.
		logits: Holds the log probabilities for labels, a float tensor.
		@@ -4333,7 +4333,7 @@ class Hingeloss(Layer):
		"""

		def __init__(self, in_layers=None, **kwargs):
		super(Hingeloss, self).__init__(in_layers, **kwargs)
		super(HingeLoss, self).__init__(in_layers, **kwargs)
		try:
		self._shape = self.in_layers[1].shape
		except:

deepchem/models/tensorgraph/models/scscore.py

0 → 100644

+104 −0

Original line number	Diff line number	Diff line
		import collections

		import numpy as np
		import six
		import tensorflow as tf

		from deepchem.data import NumpyDataset
		from deepchem.feat.graph_features import ConvMolFeaturizer
		from deepchem.feat.mol_graphs import ConvMol
		from deepchem.metrics import to_one_hot
		from deepchem.models.tensorgraph.graph_layers import WeaveGather, \
		DTNNEmbedding, DTNNStep, DTNNGather, DAGLayer, \
		DAGGather, DTNNExtract, MessagePassing, SetGather
		from deepchem.models.tensorgraph.graph_layers import WeaveLayerFactory
		from deepchem.models.tensorgraph.layers import Dense, SoftMax, \
		SoftMaxCrossEntropy, GraphConv, BatchNorm, HingeLoss, Sigmoid, \
		GraphPool, GraphGather, WeightedError, Dropout, BatchNormalization, Stack, Flatten, GraphCNN, GraphCNNPool, ReduceSum
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.trans import undo_transforms
		from deepchem.feat import CircularFingerprint


		class ScScoreModel(TensorGraph):

		def __init__(self,
		n_features,
		layer_sizes=[300, 300, 300],
		dropouts=0.0,
		**kwargs):
		self.n_features = n_features
		self.layer_sizes = layer_sizes
		self.dropout = dropouts
		super(ScScoreModel, self).__init__(**kwargs)
		self.build_graph()

		def build_graph(self):
		"""
		Building graph structures:
		"""
		self.m1_features = Feature(shape=(None, self.n_features))
		self.m2_features = Feature(shape=(None, self.n_features))
		prev_layer1 = self.m1_features
		prev_layer2 = self.m2_features
		for layer_size in self.layer_sizes:
		prev_layer1 = Dense(
		out_channels=layer_size,
		in_layers=[prev_layer1],
		activation_fn=tf.nn.relu)
		prev_layer2 = prev_layer1.shared([prev_layer2])
		if self.dropout > 0.0:
		prev_layer1 = Dropout(self.dropout, in_layers=prev_layer1)
		prev_layer2 = Dropout(self.dropout, in_layers=prev_layer2)

		readout_m1 = Dense(
		out_channels=1, in_layers=[prev_layer1], activation_fn=None)
		readout_m2 = readout_m1.shared([prev_layer2])
		self.add_output(Sigmoid(readout_m1) * 4 + 1)
		self.add_output(Sigmoid(readout_m2) * 4 + 1)

		difference = readout_m1 - readout_m2
		label = Label(shape=(None, 1))
		loss = HingeLoss(in_layers=[label, difference])
		self.my_task_weights = Weights(shape=(None, 1))
		loss = WeightedError(in_layers=[loss, self.my_task_weights])
		self.set_loss(loss)

		def default_generator(self,
		dataset,
		epochs=1,
		predict=False,
		deterministic=True,
		pad_batches=True):
		for epoch in range(epochs):
		for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
		batch_size=self.batch_size,
		deterministic=deterministic,
		pad_batches=pad_batches):
		feed_dict = dict()
		feed_dict[self.m1_features] = X_b[:, 0]
		feed_dict[self.m2_features] = X_b[:, 1]
		if y_b is not None and not predict:
		feed_dict[self.labels[0]] = y_b
		if w_b is not None and not predict:
		feed_dict[self.my_task_weights] = w_b
		yield feed_dict

		def predict_mols(self, mols):
		featurizer = CircularFingerprint(
		size=self.n_features, radius=3, chiral=True)
		features = np.expand_dims(featurizer.featurize(mols), axis=1)
		features = np.concatenate([features, features], axis=1)
		ds = NumpyDataset(features, None, None, None)
		return self.predict(ds)[0][:, 0]

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		tensors = {}
		for layer, column in zip([self.m1_features, self.m2_features],
		feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])
		if labels is not None:
		tensors[self.labels[0]] = tf.cast(labels, tf.int32)
		return tensors

deepchem/models/tensorgraph/models/test_sascore.py

0 → 100644

+40 −0

Original line number	Diff line number	Diff line
		import unittest

		import numpy as np

		import deepchem
		from deepchem.data import NumpyDataset
		from deepchem.models import GraphConvModel
		from deepchem.models import TensorGraph
		from deepchem.molnet.load_function.delaney_datasets import load_delaney
		from deepchem.models.tensorgraph.layers import ReduceSum, L2Loss
		from deepchem.models import WeaveModel
		from deepchem.feat import ConvMolFeaturizer


		class TestSaScoreModel(unittest.TestCase):

		def test_save_load(self):
		"""Test creating an Estimator from a ScScoreModel."""
		n_samples = 10
		n_features = 3
		n_tasks = 1

		# Create a dataset and an input function for processing it.

		np.random.seed(123)
		X = np.random.rand(n_samples, 2, n_features)
		y = np.zeros((n_samples, n_tasks))
		dataset = deepchem.data.NumpyDataset(X, y)

		model = deepchem.models.ScScoreModel(n_features, dropouts=0)

		model.fit(dataset, nb_epoch=1)
		pred1 = model.predict(dataset)

		model.save()
		model = TensorGraph.load_from_dir(model.model_dir)

		pred2 = model.predict(dataset)
		for m1, m2 in zip(pred1, pred2):
		self.assertTrue(np.all(m1 == m2))

deepchem/models/tensorgraph/tests/test_estimators.py

+50 −0

Original line number	Diff line number	Diff line
		@@ -277,3 +277,53 @@ class TestEstimators(unittest.TestCase):

		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['accuracy'] > 0.9

		def test_scscore(self):
		"""Test creating an Estimator from a ScScoreModel."""
		n_samples = 10
		n_features = 3
		n_tasks = 1

		# Create a dataset and an input function for processing it.

		np.random.seed(123)
		X = np.random.rand(n_samples, 2, n_features)
		y = np.zeros((n_samples, n_tasks))
		dataset = dc.data.NumpyDataset(X, y)

		def input_fn(epochs):
		x, y, weights = dataset.make_iterator(
		batch_size=n_samples, epochs=epochs).get_next()
		x1 = x[:, 0]
		x2 = x[:, 1]
		return {'x1': x1, 'x2': x2, 'weights': weights}, y

		# Create a TensorGraph model.

		model = dc.models.ScScoreModel(n_features, dropouts=0)

		# Create an estimator from it.

		x_col1 = tf.feature_column.numeric_column('x1', shape=(n_features,))
		x_col2 = tf.feature_column.numeric_column('x2', shape=(n_features,))

		def accuracy(labels, predictions, weights):
		return tf.metrics.accuracy(labels, tf.round(predictions), weights)

		metrics = {'accuracy': accuracy}
		estimator = model.make_estimator(
		feature_columns=[x_col1, x_col2], metrics=metrics)

		# Train the model.

		estimator.train(input_fn=lambda: input_fn(100))

		# Evaluate the model.

		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		print(results)
		assert results['loss'] < 1e-4
		# TODO(LESWING) Discuss with peastman.
		# The output here is human readable
		# score 1-5 per molecule not a probability of class
		# assert results['accuracy'] > 0.9

Admin message