Merge pull request #2153 from deepchem/weavefix (91796d3d) · Commits · 钟慕尧 / deepchem

deepchem/models/graph_models.py

+70 −50

Original line number	Diff line number	Diff line
		@@ -4,7 +4,7 @@ import deepchem as dc
		import numpy as np
		import tensorflow as tf

		from typing import List, Union, Tuple, Iterable, Dict
		from typing import List, Union, Tuple, Iterable, Dict, Optional
		from deepchem.utils.typing import OneOrMany, LossFn, KerasActivationFn
		from deepchem.data import Dataset, NumpyDataset, pad_features
		from deepchem.feat.graph_features import ConvMolFeaturizer
		@@ -78,7 +78,8 @@ class WeaveModel(KerasModel):

		"""

		def __init__(self,
		def __init__(
		self,
		n_tasks: int,
		n_atom_feat: OneOrMany[int] = 75,
		n_pair_feat: OneOrMany[int] = 14,
		@@ -86,11 +87,13 @@ class WeaveModel(KerasModel):
		n_graph_feat: int = 128,
		n_weave: int = 2,
		fully_connected_layer_sizes: List[int] = [2000, 100],
		weight_init_stddevs: OneOrMany[float] = [0.01, 0.04],
		bias_init_consts: OneOrMany[float] = [0.5, 3.0],
		conv_weight_init_stddevs: OneOrMany[float] = 0.03,
		weight_init_stddevs: OneOrMany[float] = 0.01,
		bias_init_consts: OneOrMany[float] = 0.0,
		weight_decay_penalty: float = 0.0,
		weight_decay_penalty_type: str = "l2",
		dropouts: OneOrMany[float] = 0.25,
		final_conv_activation_fn: Optional[KerasActivationFn] = tf.nn.tanh,
		activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
		batch_normalize: bool = True,
		batch_normalize_kwargs: Dict = {
		@@ -108,40 +111,49 @@ class WeaveModel(KerasModel):
		----------
		n_tasks: int
		Number of tasks
		n_atom_feat: int, optional
		Number of features per atom.
		n_pair_feat: int, optional
		n_atom_feat: int, optional (default 75)
		Number of features per atom. Note this is 75 by default and should be 78
		if chirality is used by `WeaveFeaturizer`.
		n_pair_feat: int, optional (default 14)
		Number of features per pair of atoms.
		n_hidden: int, optional
		n_hidden: int, optional (default 50)
		Number of units(convolution depths) in corresponding hidden layer
		n_graph_feat: int, optional
		n_graph_feat: int, optional (default 128)
		Number of output features for each molecule(graph)
		n_weave: int, optional
		n_weave: int, optional (default 2)
		The number of weave layers in this model.
		fully_connected_layer_sizes: list
		fully_connected_layer_sizes: list (default `[2000, 100]`)
		The size of each dense layer in the network. The length of
		this list determines the number of layers.
		weight_init_stddevs: list or float
		conv_weight_init_stddevs: list or float (default 0.03)
		The standard deviation of the distribution to use for weight
		initialization of each convolutional layer. The length of this lisst
		should equal `n_weave`. Alternatively, this may be a single value instead
		of a list, in which case the same value is used for each layer.
		weight_init_stddevs: list or float (default 0.01)
		The standard deviation of the distribution to use for weight
		initialization of each layer. The length of this list should
		equal len(layer_sizes). Alternatively this may be a single
		value instead of a list, in which case the same value is used
		for every layer.
		bias_init_consts: list or float
		The value to initialize the biases in each layer to. The
		initialization of each fully connected layer. The length of this list
		should equal len(layer_sizes). Alternatively this may be a single value
		instead of a list, in which case the same value is used for every layer.
		bias_init_consts: list or float (default 0.0)
		The value to initialize the biases in each fully connected layer. The
		length of this list should equal len(layer_sizes).
		Alternatively this may be a single value instead of a list, in
		which case the same value is used for every layer.
		weight_decay_penalty: float
		weight_decay_penalty: float (default 0.0)
		The magnitude of the weight decay penalty to use
		weight_decay_penalty_type: str
		weight_decay_penalty_type: str (default "l2")
		The type of penalty to use for weight decay, either 'l1' or 'l2'
		dropouts: list or float
		The dropout probablity to use for each layer. The length of this list
		dropouts: list or float (default 0.25)
		The dropout probablity to use for each fully connected layer. The length of this list
		should equal len(layer_sizes). Alternatively this may be a single value
		instead of a list, in which case the same value is used for every layer.
		activation_fns: list or object
		The Tensorflow activation function to apply to each layer. The length
		final_conv_activation_fn: Optional[KerasActivationFn] (default `tf.nn.tanh`)
		The Tensorflow activation funcntion to apply to the final
		convolution at the end of the weave convolutions. If `None`, then no
		activate is applied (hence linear).
		activation_fns: list or object (default `tf.nn.relu`)
		The Tensorflow activation function to apply to each fully connected layer. The length
		of this list should equal len(layer_sizes). Alternatively this may be a
		single value instead of a list, in which case the same value is used for
		every layer.
		@@ -159,10 +171,12 @@ class WeaveModel(KerasModel):
		compress_post_gaussian_expansion: bool, optional (default False)
		If True, compress the results of the Gaussian expansion back to the
		original dimensions of the input.
		mode: str
		mode: str (default "classification")
		Either "classification" or "regression" for type of model.
		n_classes: int
		n_classes: int (default 2)
		Number of classes to predict (only used in classification mode)
		batch_size: int (default 100)
		Batch size used by this model for training.
		"""
		if mode not in ['classification', 'regression']:
		raise ValueError("mode must be either 'classification' or 'regression'")
		@@ -172,6 +186,8 @@ class WeaveModel(KerasModel):
		if not isinstance(n_pair_feat, collections.Sequence):
		n_pair_feat = [n_pair_feat] * n_weave
		n_layers = len(fully_connected_layer_sizes)
		if not isinstance(conv_weight_init_stddevs, collections.Sequence):
		conv_weight_init_stddevs = [conv_weight_init_stddevs] * n_weave
		if not isinstance(weight_init_stddevs, collections.Sequence):
		weight_init_stddevs = [weight_init_stddevs] * n_layers
		if not isinstance(bias_init_consts, collections.Sequence):
		@@ -217,12 +233,16 @@ class WeaveModel(KerasModel):
		n_pair_input_feat=n_pair,
		n_atom_output_feat=n_atom_next,
		n_pair_output_feat=n_pair_next,
		init=tf.keras.initializers.TruncatedNormal(
		stddev=conv_weight_init_stddevs[ind]),
		batch_normalize=batch_normalize)(inputs)
		inputs = [weave_layer_ind_A, weave_layer_ind_P, pair_split, atom_to_pair]
		# Final atom-layer convolution. Note this differs slightly from the paper
		# since we use a tanh activation. This seems necessary for numerical
		# since we use a tanh activation as default. This seems necessary for numerical
		# stability.
		dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer_ind_A)
		dense1 = Dense(
		self.n_graph_feat,
		activation=final_conv_activation_fn)(weave_layer_ind_A)
		if batch_normalize:
		dense1 = BatchNormalization(**batch_normalize_kwargs)(dense1)
		weave_gather = layers.WeaveGather(

deepchem/models/tests/test_layers.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -108,7 +108,8 @@ def test_weave_layer():
		mols = [Chem.MolFromSmiles(s) for s in raw_smiles]
		featurizer = dc.feat.WeaveFeaturizer()
		mols = featurizer.featurize(mols)
		weave = layers.WeaveLayer()
		weave = layers.WeaveLayer(
		init=tf.keras.initializers.TruncatedNormal(stddev=0.03))
		atom_feat = []
		pair_feat = []
		atom_to_pair = []

deepchem/models/tests/test_overfit.py

+6 −18

Original line number	Diff line number	Diff line
		@@ -737,23 +737,16 @@ def test_weave_singletask_classification_overfit():

		classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score)

		n_atom_feat = 75
		n_pair_feat = 14
		n_feat = 128
		batch_size = 10

		model = dc.models.WeaveModel(
		n_tasks,
		n_atom_feat=n_atom_feat,
		n_pair_feat=n_pair_feat,
		n_graph_feat=n_feat,
		batch_size=batch_size,
		learning_rate=0.001,
		use_queue=False,
		learning_rate=0.0003,
		dropout=0.0,
		mode="classification")

		# Fit trained model
		model.fit(dataset, nb_epoch=20)
		model.fit(dataset, nb_epoch=100)

		# Eval model on train
		scores = model.evaluate(dataset, [classification_metric])
		@@ -761,6 +754,7 @@ def test_weave_singletask_classification_overfit():
		assert scores[classification_metric.name] > .65


		@pytest.mark.slow
		def test_weave_singletask_regression_overfit():
		"""Test weave model overfits tiny data."""
		np.random.seed(123)
		@@ -779,19 +773,13 @@ def test_weave_singletask_regression_overfit():
		regression_metric = dc.metrics.Metric(
		dc.metrics.pearson_r2_score, task_averager=np.mean)

		n_atom_feat = 75
		n_pair_feat = 14
		n_feat = 128
		batch_size = 10

		model = dc.models.WeaveModel(
		n_tasks,
		n_atom_feat=n_atom_feat,
		n_pair_feat=n_pair_feat,
		n_graph_feat=n_feat,
		batch_size=batch_size,
		learning_rate=0.001,
		use_queue=False,
		learning_rate=0.0003,
		dropout=0.0,
		mode="regression")

		# Fit trained model

deepchem/models/tests/test_weave_models.py

+15 −18

Original line number	Diff line number	Diff line
		@@ -13,8 +13,10 @@ from deepchem.feat import ConvMolFeaturizer
		from flaky import flaky


		def get_dataset(mode='classification', featurizer='GraphConv', num_tasks=2):
		data_points = 20
		def get_dataset(mode='classification',
		featurizer='GraphConv',
		num_tasks=2,
		data_points=20):
		if mode == 'classification':
		tasks, all_dataset, transformers = load_bace_classification(
		featurizer, reload=False)
		@@ -121,22 +123,18 @@ def test_compute_features_on_distance_1():
		@flaky
		@pytest.mark.slow
		def test_weave_model():
		tasks, dataset, transformers, metric = get_dataset('classification', 'Weave')
		tasks, dataset, transformers, metric = get_dataset(
		'classification', 'Weave', data_points=10)

		batch_size = 20
		batch_size = 10
		model = WeaveModel(
		len(tasks),
		batch_size=batch_size,
		mode='classification',
		fully_connected_layer_sizes=[2000, 1000],
		batch_normalize=True,
		batch_normalize_kwargs={
		"fused": False,
		"trainable": True,
		"renorm": True
		},
		learning_rage=0.0005)
		model.fit(dataset, nb_epoch=200)
		final_conv_activation_fn=None,
		dropouts=0,
		learning_rage=0.0003)
		model.fit(dataset, nb_epoch=100)
		scores = model.evaluate(dataset, [metric], transformers)
		assert scores['mean-roc_auc_score'] >= 0.9

		@@ -147,18 +145,17 @@ def test_weave_regression_model():
		import tensorflow as tf
		tf.random.set_seed(123)
		np.random.seed(123)
		tasks, dataset, transformers, metric = get_dataset('regression', 'Weave')
		tasks, dataset, transformers, metric = get_dataset(
		'regression', 'Weave', data_points=10)

		batch_size = 10
		model = WeaveModel(
		len(tasks),
		batch_size=batch_size,
		mode='regression',
		batch_normalize=False,
		fully_connected_layer_sizes=[],
		dropouts=0,
		learning_rate=0.0005)
		model.fit(dataset, nb_epoch=200)
		learning_rate=0.00003)
		model.fit(dataset, nb_epoch=400)
		scores = model.evaluate(dataset, [metric], transformers)
		assert scores['mean_absolute_error'] < 0.1

Admin message