Unverified Commit c43482fd authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1475 from VIGS25/make-estimator-dtnn-model

#1142: Make estimator DTNN model
parents 8b630252 f2856a61
Loading
Loading
Loading
Loading
+69 −32
Original line number Diff line number Diff line
@@ -311,24 +311,13 @@ class DTNNModel(TensorGraph):
    weighted_loss = ReduceSum(L2Loss(in_layers=[labels, output, weights]))
    self.set_loss(weighted_loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    """TensorGraph style implementation"""
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
  def compute_features_on_batch(self, X_b):
    """Computes the values for different Feature Layers on given batch

        feed_dict = dict()
        if y_b is not None:
          feed_dict[self.labels[0]] = y_b
        if w_b is not None:
          feed_dict[self.task_weights[0]] = w_b
    A tf.py_func wrapper is written around this when creating the
    input_fn for tf.Estimator

    """
    distance = []
    atom_membership = []
    distance_membership_i = []
@@ -352,18 +341,66 @@ class DTNNModel(TensorGraph):
      distance_membership_i.append(membership_i + start)
      distance_membership_j.append(membership_j + start)
      start = start + num_atoms[im]
        feed_dict[self.atom_number] = np.concatenate(atom_number)
        distance = np.concatenate(distance, 0)
        feed_dict[self.distance] = np.exp(

    atom_number = np.concatenate(atom_number).astype(np.int32)
    distance = np.concatenate(distance, axis=0)
    gaussian_dist = np.exp(
        -np.square(distance - self.steps) / (2 * self.step_size**2))
        feed_dict[self.distance_membership_i] = np.concatenate(
            distance_membership_i)
        feed_dict[self.distance_membership_j] = np.concatenate(
            distance_membership_j)
        feed_dict[self.atom_membership] = np.concatenate(atom_membership)
    gaussian_dist = gaussian_dist.astype(np.float32)
    atom_mem = np.concatenate(atom_membership).astype(np.int32)
    dist_mem_i = np.concatenate(distance_membership_i).astype(np.int32)
    dist_mem_j = np.concatenate(distance_membership_j).astype(np.int32)

    features = [atom_number, gaussian_dist, dist_mem_i, dist_mem_j, atom_mem]

    return features

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    """TensorGraph style implementation"""
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None:
          feed_dict[self.labels[0]] = y_b
        if w_b is not None:
          feed_dict[self.task_weights[0]] = w_b

        features = self.compute_features_on_batch(X_b)
        feed_dict[self.atom_number] = features[0]
        feed_dict[self.distance] = features[1]
        feed_dict[self.distance_membership_i] = features[2]
        feed_dict[self.distance_membership_j] = features[3]
        feed_dict[self.atom_membership] = features[4]

        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    tensors = dict()
    for layer, column in zip(self.features, feature_columns):
      feature_col = tf.feature_column.input_layer(features, [column])
      if column.dtype != feature_col.dtype:
        feature_col = tf.cast(feature_col, column.dtype)
      if len(column.shape) < 1:
        feature_col = tf.reshape(feature_col, shape=[tf.shape(feature_col)[0]])
      tensors[layer] = feature_col
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
          features, [weight_column])
    if labels is not None:
      tensors[self.labels[0]] = labels

    return tensors


class DAGModel(TensorGraph):

+71 −0
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@ import deepchem as dc
import deepchem.models.tensorgraph.layers as layers
from deepchem.data import NumpyDataset
from deepchem.models.tensorgraph.models.text_cnn import default_dict
from scipy.io import loadmat
from flaky import flaky
import os


class TestEstimators(unittest.TestCase):
@@ -453,3 +456,71 @@ class TestEstimators(unittest.TestCase):
    # Train the model.

    estimator.train(input_fn=lambda: input_fn(100))

  @flaky
  def test_dtnn_regression_model(self):
    """Test creating an estimator for DTNNGraphModel for regression"""
    data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")
    input_file = os.path.join(data_dir, "models", "example_DTNN.mat")
    dataset = loadmat(input_file)

    np.random.seed(123)
    X = dataset['X']
    y = dataset['T'].astype(np.float32)
    w = np.ones_like(y)
    dataset = dc.data.NumpyDataset(X, y, w, ids=None)
    n_tasks = y.shape[1]
    n_samples = y.shape[0]

    dtypes = [tf.int32, tf.float32, tf.int32, tf.int32, tf.int32]

    model = dc.models.DTNNModel(
        n_tasks,
        n_embedding=20,
        n_distance=100,
        learning_rate=1.0,
        mode="regression")

    def mean_relative_error(labels, predictions, weights):
      error = tf.abs(1 - tf.div(labels, predictions))
      error_val, update_op = tf.metrics.mean(error)
      return error_val, update_op

    def input_fn(batch_size, epochs):
      X, y, weights = dataset.make_iterator(
          batch_size=batch_size, epochs=epochs).get_next()
      features = tf.py_func(
          model.compute_features_on_batch, inp=[X], Tout=dtypes)

      assert len(features) == 5
      feature_dict = dict()
      feature_dict['atom_num'] = features[0]
      feature_dict['distance'] = features[1]
      feature_dict['dist_mem_i'] = features[2]
      feature_dict['dist_mem_j'] = features[3]
      feature_dict['atom_mem'] = features[4]
      feature_dict['weights'] = weights

      return feature_dict, y

    atom_number = tf.feature_column.numeric_column(
        'atom_num', shape=[], dtype=dtypes[0])
    distance = tf.feature_column.numeric_column(
        'distance', shape=(model.n_distance,), dtype=dtypes[1])
    atom_mem = tf.feature_column.numeric_column(
        'atom_mem', shape=[], dtype=dtypes[2])
    dist_mem_i = tf.feature_column.numeric_column(
        'dist_mem_i', shape=[], dtype=dtypes[3])
    dist_mem_j = tf.feature_column.numeric_column(
        'dist_mem_j', shape=[], dtype=dtypes[4])

    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'error': mean_relative_error}

    feature_cols = [atom_number, distance, dist_mem_i, dist_mem_j, atom_mem]
    estimator = model.make_estimator(
        feature_columns=feature_cols, weight_column=weight_col, metrics=metrics)
    estimator.train(input_fn=lambda: input_fn(100, 250))

    results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
    assert results['error'] < 0.1