Commit ec30df0f authored by Vignesh's avatar Vignesh
Browse files

Added make_estimator for BPSymmetry, ANIRegression

parent f8af3850
Loading
Loading
Loading
Loading
+110 −57
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ import tensorflow as tf
import deepchem as dc

from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat, Exp
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature, Dropout, WeightDecay, ReduceSum, Reshape
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
@@ -49,12 +49,18 @@ class BPSymmetryFunctionRegression(TensorGraph):

    super(BPSymmetryFunctionRegression, self).__init__(**kwargs)

    self.build_graph()
    self._build_graph()

  def build_graph(self):
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))
    previous_layer = self.atom_feats
  def _build_graph(self):
    self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
    self.atom_feats = Feature(shape=(None, self.max_atoms * self.n_feat))

    reshaped_atom_feats = Reshape(
        in_layers=[self.atom_feats], shape=(-1, self.max_atoms, self.n_feat))
    reshaped_atom_flags = Reshape(
        in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))

    previous_layer = reshaped_atom_feats

    Hiddens = []
    for n_hidden in self.layer_structures:
@@ -65,23 +71,27 @@ class BPSymmetryFunctionRegression(TensorGraph):
      Hiddens.append(Hidden)
      previous_layer = Hiddens[-1]

    costs = []
    self.labels_fd = []
    for task in range(self.n_tasks):
    regression = Dense(
          out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
      output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
        out_channels=1 * self.n_tasks,
        activation_fn=None,
        in_layers=[Hiddens[-1]])
    output = BPGather(
        self.max_atoms, in_layers=[regression, reshaped_atom_flags])
    self.add_output(output)

      label = Label(shape=(None, 1))
      self.labels_fd.append(label)
      cost = L2Loss(in_layers=[label, output])
      costs.append(cost)
    label = Label(shape=(None, self.n_tasks, 1))
    loss = ReduceSum(L2Loss(in_layers=[label, output]))
    weights = Weights(shape=(None, self.n_tasks))

    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)

    all_cost = Stack(in_layers=costs, axis=1)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights])
    self.set_loss(loss)
  def compute_features_on_batch(self, X_b):
    flags = np.sign(np.array(X_b[:, :, 0]))
    atom_flags = np.stack([flags] * self.max_atoms, axis=2) * \
                 np.stack([flags] * self.max_atoms, axis=1)
    atom_feats = np.array(X_b[:, :, 1:], dtype=np.float32)
    return [atom_feats, atom_flags]

  def default_generator(self,
                        dataset,
@@ -99,17 +109,33 @@ class BPSymmetryFunctionRegression(TensorGraph):

        feed_dict = dict()
        if y_b is not None and not predict:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
          feed_dict[self.labels[0]] = y_b
        if w_b is not None and not predict:
          feed_dict[self.weights] = w_b
          feed_dict[self.task_weights[0]] = w_b

        atom_feats, atom_flags = self.compute_features_on_batch(X_b)
        feed_dict[self.atom_feats] = atom_feats
        feed_dict[self.atom_flags] = atom_flags

        flags = np.sign(np.array(X_b[:, :, 0]))
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    tensors = dict()
    for layer, column in zip(self.features, feature_columns):
      feature_col = tf.feature_column.input_layer(features, [column])
      if feature_col.dtype != column.dtype:
        feature_col = tf.cast(feature_col, column.dtype)
      tensors[layer] = feature_col

      if weight_column is not None:
        tensors[self.task_weights[0]] = tf.feature_column.input_layer(
            features, [weight_column])
      if labels is not None:
        tensors[self.labels[0]] = labels

    return tensors


class ANIRegression(TensorGraph):

@@ -304,11 +330,16 @@ class ANIRegression(TensorGraph):
  def build_graph(self):

    self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_feats = Feature(shape=(None, self.max_atoms, 4))
    self.atom_flags = Feature(shape=(None, self.max_atoms * self.max_atoms))
    self.atom_feats = Feature(shape=(None, self.max_atoms * 4))

    reshaped_atom_flags = Reshape(
        in_layers=[self.atom_flags], shape=(-1, self.max_atoms, self.max_atoms))
    reshaped_atom_feats = Reshape(
        in_layers=[self.atom_feats], shape=(-1, self.max_atoms, 4))

    previous_layer = ANIFeat(
        in_layers=self.atom_feats, max_atoms=self.max_atoms)
        in_layers=reshaped_atom_feats, max_atoms=self.max_atoms)

    self.featurized = previous_layer

@@ -323,25 +354,31 @@ class ANIRegression(TensorGraph):
      Hiddens.append(Hidden)
      previous_layer = Hiddens[-1]

    costs = []
    self.labels_fd = []
    for task in range(self.n_tasks):
    regression = Dense(
          out_channels=1, activation_fn=None, in_layers=[Hiddens[-1]])
      output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
        out_channels=1 * self.n_tasks,
        activation_fn=None,
        in_layers=[Hiddens[-1]])
    output = BPGather(
        self.max_atoms, in_layers=[regression, reshaped_atom_flags])
    self.add_output(output)

      label = Label(shape=(None, 1))
      self.labels_fd.append(label)
      cost = L2Loss(in_layers=[label, output])
      costs.append(cost)
    label = Label(shape=(None, self.n_tasks, 1))
    loss = ReduceSum(L2Loss(in_layers=[label, output]))
    weights = Weights(shape=(None, self.n_tasks))

    all_cost = Stack(in_layers=costs, axis=1)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    if self.exp_loss:
      loss = Exp(in_layers=[loss])
    self.set_loss(loss)
      weighted_loss = Exp(in_layers=[weighted_loss])
    self.set_loss(weighted_loss)

  def compute_features_on_batch(self, X_b):
    flags = np.sign(np.array(X_b[:, :, 0]))
    atom_flags = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
    atom_numbers = np.array(X_b[:, :, 0], dtype=np.int32)
    atom_feats = np.array(X_b[:, :, :], dtype=np.float32)

    return [atom_feats, atom_numbers, atom_flags]

  def default_generator(self,
                        dataset,
@@ -359,18 +396,34 @@ class ANIRegression(TensorGraph):

        feed_dict = dict()
        if y_b is not None and not predict:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
          feed_dict[self.labels[0]] = y_b
        if w_b is not None and not predict:
          feed_dict[self.weights] = w_b
          feed_dict[self.task_weights[0]] = w_b

        flags = np.sign(np.array(X_b[:, :, 0]))
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_feats] = np.array(X_b[:, :, :], dtype=float)
        atom_feats, atom_numbers, atom_flags = self.compute_features_on_batch(
            X_b)
        feed_dict[self.atom_feats] = atom_feats
        feed_dict[self.atom_numbers] = atom_numbers
        feed_dict[self.atom_flags] = atom_flags
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    tensors = dict()
    for layer, column in zip(self.features, feature_columns):
      feature_col = tf.feature_column.input_layer(features, [column])
      if feature_col.dtype != column.dtype:
        feature_col = tf.cast(feature_col, column.dtype)
      tensors[layer] = feature_col

      if weight_column is not None:
        tensors[self.task_weights[0]] = tf.feature_column.input_layer(
            features, [weight_column])
      if labels is not None:
        tensors[self.labels[0]] = labels

    return tensors

  def save_numpy(self):
    """
    Save to a portable numpy file. Note that this relies on the names to be consistent
+151 −2
Original line number Diff line number Diff line
@@ -467,9 +467,11 @@ class TestEstimators(unittest.TestCase):
    input_file = os.path.join(current_dir, "example_DTNN.mat")
    dataset = loadmat(input_file)

    num_vals_to_use = 5

    np.random.seed(123)
    X = dataset['X']
    y = dataset['T'].astype(np.float32)
    X = dataset['X'][:num_vals_to_use]
    y = dataset['T'][:num_vals_to_use].astype(np.float32)
    w = np.ones_like(y)
    dataset = dc.data.NumpyDataset(X, y, w, ids=None)
    n_tasks = y.shape[1]
@@ -527,3 +529,150 @@ class TestEstimators(unittest.TestCase):

    results = estimator.evaluate(input_fn=lambda: input_fn(n_samples, 1))
    assert results['error'] < 0.1

  def test_bpsymm_regression_model(self):
    """Test creating an estimator for BPSymmetry Regression model."""
    tasks, dataset, transformers = dc.molnet.load_qm7_from_mat(
        featurizer='BPSymmetryFunction', move_mean=False)

    num_samples_to_use = 5
    train, _, _ = dataset
    X = train.X[:num_samples_to_use]
    y = train.y[:num_samples_to_use]
    w = train.w[:num_samples_to_use]
    ids = train.ids[:num_samples_to_use]

    dataset = dc.data.NumpyDataset(X, y, w, ids)

    max_atoms = 23
    batch_size = 16
    layer_structures = [128, 128, 64]

    ANItransformer = dc.trans.ANITransformer(
        max_atoms=max_atoms, atomic_number_differentiated=False)
    dataset = ANItransformer.transform(dataset)
    n_feat = ANItransformer.get_num_feats() - 1

    model = dc.models.BPSymmetryFunctionRegression(
        len(tasks),
        max_atoms,
        n_feat,
        layer_structures=layer_structures,
        batch_size=batch_size,
        learning_rate=0.001,
        use_queue=False,
        mode="regression")

    metrics = {'error': tf.metrics.mean_absolute_error}

    def input_fn(epochs):
      X, y, w = dataset.make_iterator(
          batch_size=batch_size, epochs=epochs).get_next()
      atom_feats, atom_flags = tf.py_func(
          model.compute_features_on_batch, [X], Tout=[tf.float32, tf.float32])
      atom_feats = tf.reshape(
          atom_feats,
          shape=(tf.shape(atom_feats)[0], model.max_atoms * model.n_feat))
      atom_flags = tf.reshape(
          atom_flags,
          shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

      features = dict()
      features['atom_feats'] = atom_feats
      features['atom_flags'] = atom_flags
      features['weights'] = w
      return features, y

    atom_feats = tf.feature_column.numeric_column(
        'atom_feats', shape=(max_atoms * n_feat,), dtype=tf.float32)
    atom_flags = tf.feature_column.numeric_column(
        'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
    weight_col = tf.feature_column.numeric_column(
        'weights', shape=(len(tasks),), dtype=tf.float32)

    estimator = model.make_estimator(
        feature_columns=[atom_feats, atom_flags],
        weight_column=weight_col,
        metrics=metrics)
    estimator.train(input_fn=lambda: input_fn(100))
    results = estimator.evaluate(input_fn=lambda: input_fn(1))

    assert results['error'] < 0.1

  def test_ani_regression(self):
    """Test creating an estimator for ANI Regression."""

    max_atoms = 4

    X = np.array(
        [[
            [1, 5.0, 3.2, 1.1],
            [6, 1.0, 3.4, -1.1],
            [1, 2.3, 3.4, 2.2],
            [0, 0, 0, 0],
        ], [
            [8, 2.0, -1.4, -1.1],
            [7, 6.3, 2.4, 3.2],
            [0, 0, 0, 0],
            [0, 0, 0, 0],
        ]],
        dtype=np.float32)

    y = np.array([[2.0], [1.1]], dtype=np.float32)

    layer_structures = [128, 128, 64]
    atom_number_cases = [1, 6, 7, 8]

    kwargs = {
        "n_tasks": 1,
        "max_atoms": max_atoms,
        "layer_structures": layer_structures,
        "atom_number_cases": atom_number_cases,
        "batch_size": 2,
        "learning_rate": 0.001,
        "use_queue": False,
        "mode": "regression"
    }

    model = dc.models.ANIRegression(**kwargs)
    dataset = dc.data.NumpyDataset(X, y, n_tasks=1)

    metrics = {'error': tf.metrics.mean_absolute_error}

    def input_fn(epochs):
      X, y, w = dataset.make_iterator(batch_size=2, epochs=epochs).get_next()
      atom_feats, atom_numbers, atom_flags = tf.py_func(
          model.compute_features_on_batch, [X],
          Tout=[tf.float32, tf.int32, tf.float32])
      atom_feats = tf.reshape(
          atom_feats, shape=(tf.shape(atom_feats)[0], model.max_atoms * 4))
      atom_numbers = tf.reshape(
          atom_numbers, shape=(tf.shape(atom_numbers)[0], model.max_atoms))
      atom_flags = tf.reshape(
          atom_flags,
          shape=(tf.shape(atom_flags)[0], model.max_atoms * model.max_atoms))

      features = dict()
      features['atom_feats'] = atom_feats
      features['atom_numbers'] = atom_numbers
      features['atom_flags'] = atom_flags
      features['weights'] = w
      return features, y

    atom_feats = tf.feature_column.numeric_column(
        'atom_feats', shape=(max_atoms * 4,), dtype=tf.float32)
    atom_numbers = tf.feature_column.numeric_column(
        'atom_numbers', shape=(max_atoms,), dtype=tf.int32)
    atom_flags = tf.feature_column.numeric_column(
        'atom_flags', shape=(max_atoms * max_atoms), dtype=tf.float32)
    weight_col = tf.feature_column.numeric_column(
        'weights', shape=(kwargs["n_tasks"],), dtype=tf.float32)

    estimator = model.make_estimator(
        feature_columns=[atom_feats, atom_numbers, atom_flags],
        weight_column=weight_col,
        metrics=metrics)
    estimator.train(input_fn=lambda: input_fn(100))

    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    assert results['error'] < 0.1