Commit 7849d99c authored by Joseph Gomes's avatar Joseph Gomes
Browse files

Update GDB7 tf model script and benchmark

parent 42a2370c
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -263,14 +263,14 @@ Scaffold splitting
|chembl          |MT-NN regression    |Index       |0.443         |0.427         |
|                |MT-NN regression    |Random      |0.464         |0.434         |
|                |MT-NN regression    |Scaffold    |0.484         |0.361         |
|gdb7            |MT-NN regression    |Index       |0.961         |0.011         |
|                |MT-NN regression    |Random      |0.742         |0.732         |
|gdb7            |MT-NN regression    |Index       |0.994         |0.010         |
|                |MT-NN regression    |Random      |0.860         |0.773         |
|kaggle          |MT-NN regression    |User-defined|0.748         |0.452         |

|Dataset         |Model               |Splitting   |Train score/MAE(kcal/mol)|Valid score/MAE(kcal/mol)|
|----------------|--------------------|------------|-------------------------|-------------------------|
|gdb7            |MT-NN regression    |Index       |44.5                     |185.6                    |
|                |MT-NN regression    |Random      |86.1                     |92.2                     |
|gdb7            |MT-NN regression    |Index       |18.3                     |172.0                    |
|                |MT-NN regression    |Random      |44.3                     |59.1                     |

* General features

+10 −8
Original line number Diff line number Diff line
@@ -11,18 +11,20 @@ import numpy as np
from gdb7_datasets import load_gdb7

np.random.seed(123)

gdb7_tasks, datasets, transformers = load_gdb7(featurizer=dc.feat.CoulombMatrix(23))
split="random"
num_atoms=23
gdb7_tasks, datasets, transformers = load_gdb7(featurizer=dc.feat.CoulombMatrix(num_atoms), split=split)
train_dataset, valid_dataset, test_dataset = datasets
#fit_transformers = [dc.trans.CoulombRandomizationFitTransformer(), dc.trans.NormalizationFitTransformer()]
X = train_dataset.X
fit_transformers = [dc.trans.CoulombFitTransformer(X, num_atoms=23)]
fit_transformers = [dc.trans.CoulombFitTransformer(X, num_atoms)]

regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, 
                                      mode="regression")
regression_metric = [dc.metrics.Metric(dc.metrics.mean_absolute_error, 
                                      mode="regression"), dc.metrics.Metric(dc.metrics.pearson_r2_score,
				      mode="regression")]
model = dc.models.TensorflowMultiTaskFitTransformRegressor(
    n_tasks=len(gdb7_tasks), n_features=23,
    learning_rate=.001, momentum=.8, batch_size=512,
    learning_rate=.0002, momentum=.8, batch_size=512,
    weight_init_stddevs=[1/np.sqrt(2000),1/np.sqrt(800),1/np.sqrt(800),1/np.sqrt(1000)],
    bias_init_consts=[0.,0.,0.,0.], layer_sizes=[2000,800,800,1000], 
    dropouts=[0.1,0.1,0.1,0.1], fit_transformers=fit_transformers, n_random_samples=10, seed=123)
@@ -31,10 +33,10 @@ model = dc.models.TensorflowMultiTaskFitTransformRegressor(
model.fit(train_dataset, nb_epoch=50)
model.save()

train_scores = model.evaluate(train_dataset, [regression_metric], transformers)
train_scores = model.evaluate(train_dataset, regression_metric, transformers)
print("Train scores [kcal/mol]")
print(train_scores)

valid_scores = model.evaluate(valid_dataset, [regression_metric], transformers)
valid_scores = model.evaluate(valid_dataset, regression_metric, transformers)
print("Validation scores [kcal/mol]")
print(valid_scores)