Commit e83f2f67 authored by Joseph Gomes's avatar Joseph Gomes
Browse files

Update gdb7 tf example with fit_transformers

parent 1f775058
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -10,7 +10,9 @@ import tensorflow as tf

from deepchem.nn import model_ops
from deepchem.metrics import from_one_hot
from deepchem.utils.save import log
from deepchem.models.tensorflow_models import TensorflowGraph
from deepchem.models.tensorflow_models import TensorflowGraphModel
from deepchem.models.tensorflow_models import TensorflowClassifier
from deepchem.models.tensorflow_models import TensorflowRegressor
from deepchem.metrics import to_one_hot
@@ -187,7 +189,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
            (self.batch_size,)) 
    return TensorflowGraph.get_feed_dict(orig_dict)

class TensorflowMultiTaskFitTransformRegressor(TensorflowRegressor):
class TensorflowMultiTaskFitTransformRegressor(TensorflowMultiTaskRegressor):
  """Implements a TensorflowMultiTaskRegressor that performs on-the-fly transformation during fit/predict"""

  def __init__(self, n_tasks, n_features, logdir=None, layer_sizes=[1000],
@@ -249,6 +251,8 @@ class TensorflowMultiTaskFitTransformRegressor(TensorflowRegressor):
              dataset.iterbatches(self.batch_size, pad_batches=pad_batches)):
            if ind % log_every_N_batches == 0:
              log("On batch %d" % ind, self.verbose)
	    for transformer in self.fit_transformers:
	      X_b = transformer.X_transform(X_b)	
            # Run training op.
            feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
            fetches = self.train_graph.output + [
@@ -291,10 +295,11 @@ class TensorflowMultiTaskFitTransformRegressor(TensorflowRegressor):
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    for transformer in self.fit_transformers:
      X = transformer.X_transform(X)
    len_unpadded = len(X)
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():
+5 −4
Original line number Diff line number Diff line
@@ -33,7 +33,8 @@ featurizer = dc.data.SDFLoader(tasks, smiles_field=smiles_field, mol_field=mol_f
dataset = featurizer.featurize(input_file, data_dir)
random_splitter = dc.splits.RandomSplitter()
train_dataset, test_dataset = random_splitter.train_test_split(dataset, train_dir, test_dir)
transformers = [dc.trans.NormalizationTransformer(transform_X=True, dataset=train_dataset), dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
fit_transformers = []

for transformer in transformers:
    train_dataset = transformer.transform(train_dataset)
@@ -41,14 +42,14 @@ for transformer in transformers:
    test_dataset = transformer.transform(test_dataset)

regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression")
model = dc.models.TensorflowMultiTaskRegressor(n_tasks=len(tasks), n_features=23, logdir=model_dir,
model = dc.models.tensorflow_models.fcnet.TensorflowMultiTaskFitTransformRegressor(n_tasks=len(tasks), n_features=23, logdir=model_dir,
                                    learning_rate=.001, momentum=.8, batch_size=512,
                                    weight_init_stddevs=[1/np.sqrt(2000),1/np.sqrt(800),1/np.sqrt(800),1/np.sqrt(1000)],
                                    bias_init_consts=[0.,0.,0.,0.], layer_sizes=[2000,800,800,1000], 
                                    dropouts=[0.1,0.1,0.1,0.1])
                                    dropouts=[0.1,0.1,0.1,0.1], fit_transformers=fit_transformers)

# Fit trained model
model.fit(train_dataset)
model.fit(train_dataset, nb_epoch=10)
model.save()

train_evaluator = dc.utils.evaluate.Evaluator(model, train_dataset, transformers)