Commit 05789988 authored by Vignesh's avatar Vignesh
Browse files

make_estimator for text_cnn; corrections to DPNNEmbedding; Added tests

parent db47d907
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -338,10 +338,13 @@ class DTNNEmbedding(Layer):

    self.build()
    atom_number = in_layers[0].out_tensor
    atom_number = tf.cast(atom_number, dtype=tf.int32)
    atom_features = tf.nn.embedding_lookup(self.embedding_list, atom_number)
    out_tensor = atom_features
    if set_tensors:
      self.variables = self.trainable_weights
      self.out_tensor = atom_features
    return out_tensor

  def none_tensors(self):
    embedding_list = self.embedding_list
+19 −5
Original line number Diff line number Diff line
@@ -207,9 +207,6 @@ class TextCNNModel(TensorGraph):
        self.labels_fd.append(label)
        cost = L2Loss(in_layers=[label, regression])
        costs.append(cost)
    if self.mode == "classification":
      all_cost = Stack(in_layers=costs, axis=1)
    elif self.mode == "regression":
    all_cost = Stack(in_layers=costs, axis=1)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights])
@@ -245,6 +242,23 @@ class TextCNNModel(TensorGraph):
        feed_dict[self.smiles_seqs] = np.stack(smiles_seqs, axis=0)
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    """Creates tensors for inputs."""
    tensors = dict()
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])

    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
          features, [weight_column])
    if labels is not None:
      if self.mode == "classification":
        tensors[self.labels[0]] = tf.one_hot(tf.cast(labels, tf.int32), 2)
      else:
        tensors[self.labels[0]] = labels
    return tensors

  def smiles_to_seq(self, smiles):
    """ Tokenize characters in smiles to integers
    """
@@ -268,7 +282,7 @@ class TextCNNModel(TensorGraph):
    for i in range(self.seq_length - len(seq)):
      # Padding with '_'
      seq.append(self.char_dict['_'])
    return np.array(seq)
    return np.array(seq, dtype=np.int32)

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    out = super(TextCNNModel, self).predict_on_generator(
+92 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ import tensorflow as tf
import deepchem as dc
import deepchem.models.tensorgraph.layers as layers
from deepchem.data import NumpyDataset
from deepchem.models.tensorgraph.models.text_cnn import default_dict


class TestEstimators(unittest.TestCase):
@@ -279,6 +280,97 @@ class TestEstimators(unittest.TestCase):
    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    assert results['accuracy'] > 0.9

  def test_textcnn_classification(self):
    """Test creating an Estimator from TextCNN for classification."""

    n_tasks = 1
    n_samples = 5

    # Create a TensorGraph model.
    seq_length = 20
    model = dc.models.TextCNNModel(
        n_tasks=n_tasks,
        char_dict=default_dict,
        seq_length=seq_length,
        kernel_sizes=[5, 5],
        num_filters=[20, 20])

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)

    def accuracy(labels, predictions, weights):
      labels = tf.argmax(labels, axis=2)
      predictions = tf.argmax(predictions, axis=1)
      predictions = tf.expand_dims(predictions, axis=1)
      return tf.metrics.accuracy(labels, predictions, weights)

    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y

    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column(
        'x', shape=(seq_length,), dtype=tf.int32)
    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'accuracy': accuracy}
    estimator = model.make_estimator(
        feature_columns=[x_col], weight_column=weight_col, metrics=metrics)

    # Train the model.
    estimator.train(input_fn=lambda: input_fn(100))

    # Evaluate results
    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    assert results['loss'] < 1e-2
    assert results['accuracy'] > 0.9

  def test_textcnn_regression(self):
    """Test creating an Estimator from TextCNN for regression."""

    n_tasks = 1
    n_samples = 10

    # Create a TensorGraph model.
    seq_length = 20
    model = dc.models.TextCNNModel(
        n_tasks=n_tasks,
        char_dict=default_dict,
        seq_length=seq_length,
        kernel_sizes=[5, 5],
        num_filters=[20, 20],
        mode="regression")

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    y = np.zeros((n_samples, n_tasks), dtype=np.float32)
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)

    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y

    print(next(dataset.itersamples()))
    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column('x', shape=(seq_length,))
    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'error': tf.metrics.mean_absolute_error}
    estimator = model.make_estimator(
        feature_columns=[x_col], weight_column=weight_col, metrics=metrics)

    # Train the model.
    estimator.train(input_fn=lambda: input_fn(100))
    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    assert results['loss'] < 1e-2
    assert results['error'] < 0.1

  def test_scscore(self):
    """Test creating an Estimator from a ScScoreModel."""
    n_samples = 10