Unverified Commit 2ebaec77 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1478 from VIGS25/textcnn-estimator

#1142: make_estimator TextCNN - Minor additions
parents c288fd5a 31b835fe
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -338,7 +338,6 @@ class DTNNEmbedding(Layer):
    self.build()

    atom_number = in_layers[0].out_tensor
    atom_number = tf.cast(atom_number, dtype=tf.int32)
    atom_features = tf.nn.embedding_lookup(self.embedding_list, atom_number)
    out_tensor = atom_features
    if set_tensors:
+26 −3
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ Created on Thu Sep 28 15:17:50 2017
import numpy as np
import tensorflow as tf
import copy
import six

from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.models.tensorgraph.layers import Dense, Concat, SoftMax, \
@@ -205,6 +206,24 @@ class TextCNNModel(TensorGraph):
    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)

  @staticmethod
  def convert_bytes_to_char(s):
    s = ''.join(chr(b) for b in s)
    return s

  def smiles_to_seq_batch(self, ids_b):
    """Converts SMILES strings to np.array sequence.

    A tf.py_func wrapper is written around this when creating the input_fn for make_estimator
    """
    if isinstance(
        ids_b[0],
        bytes) and not six.PY2:  # Python 2.7 bytes and string are analogous
      ids_b = [TextCNNModel.convert_bytes_to_char(smiles) for smiles in ids_b]
    smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
    smiles_seqs = np.vstack(smiles_seqs)
    return smiles_seqs

  def default_generator(self,
                        dataset,
                        epochs=1,
@@ -230,8 +249,7 @@ class TextCNNModel(TensorGraph):
          feed_dict[self.task_weights[0]] = w_b

        # Transform SMILES sequence to integers
        smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
        feed_dict[self.smiles_seqs] = np.vstack(smiles_seqs)
        feed_dict[self.smiles_seqs] = self.smiles_to_seq_batch(ids_b)
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
@@ -239,7 +257,12 @@ class TextCNNModel(TensorGraph):
    """Creates tensors for inputs."""
    tensors = dict()
    for layer, column in zip(self.features, feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
      feature_col = tf.feature_column.input_layer(features, [column])
      if column.dtype != feature_col.dtype:
        feature_col = tf.cast(feature_col, column.dtype)
      if len(column.shape) < 1:
        feature_col = tf.reshape(feature_col, shape=[tf.shape(feature_col)[0]])
      tensors[layer] = feature_col
    if weight_column is not None:
      tensors[self.task_weights[0]] = tf.feature_column.input_layer(
          features, [weight_column])
+8 −5
Original line number Diff line number Diff line
@@ -299,7 +299,7 @@ class TestEstimators(unittest.TestCase):

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    X = smile_ids
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)
@@ -310,7 +310,8 @@ class TestEstimators(unittest.TestCase):
    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y
      smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
      return {'x': smiles_seq, 'weights': weights}, y

    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column(
@@ -345,7 +346,7 @@ class TestEstimators(unittest.TestCase):

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    X = smile_ids
    y = np.zeros((n_samples, n_tasks, 1), dtype=np.float32)
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)
@@ -353,10 +354,12 @@ class TestEstimators(unittest.TestCase):
    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y
      smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
      return {'x': smiles_seq, 'weights': weights}, y

    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column('x', shape=(seq_length,))
    x_col = tf.feature_column.numeric_column(
        'x', shape=(seq_length,), dtype=tf.int32)
    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'error': tf.metrics.mean_absolute_error}
    estimator = model.make_estimator(