Commit 04fadd9f authored by Vignesh's avatar Vignesh
Browse files

Added smiles_to_seq conversion to input_fn and corresponding changes in text_cnn

parent 4db6f6f5
Loading
Loading
Loading
Loading
+17 −2
Original line number Diff line number Diff line
@@ -205,6 +205,22 @@ class TextCNNModel(TensorGraph):
    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)

  @staticmethod
  def convert_bytes_to_char(s):
    s = ''.join(chr(b) for b in s)
    return s

  def smiles_to_seq_batch(self, ids_b):
    """Converts SMILES strings to np.array sequence.

    A tf.py_func wrapper is written around this when creating the input_fn for make_estimator
    """
    if isinstance(ids_b[0], bytes):
      ids_b = [TextCNNModel.convert_bytes_to_char(smiles) for smiles in ids_b]
    smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
    smiles_seqs = np.vstack(smiles_seqs)
    return smiles_seqs

  def default_generator(self,
                        dataset,
                        epochs=1,
@@ -230,8 +246,7 @@ class TextCNNModel(TensorGraph):
          feed_dict[self.task_weights[0]] = w_b

        # Transform SMILES sequence to integers
        smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
        feed_dict[self.smiles_seqs] = np.vstack(smiles_seqs)
        feed_dict[self.smiles_seqs] = self.smiles_to_seq_batch(ids_b)
        yield feed_dict

  def create_estimator_inputs(self, feature_columns, weight_column, features,
+8 −5
Original line number Diff line number Diff line
@@ -296,7 +296,7 @@ class TestEstimators(unittest.TestCase):

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    X = smile_ids
    y = np.zeros((n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)
@@ -307,7 +307,8 @@ class TestEstimators(unittest.TestCase):
    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y
      smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
      return {'x': smiles_seq, 'weights': weights}, y

    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column(
@@ -342,7 +343,7 @@ class TestEstimators(unittest.TestCase):

    np.random.seed(123)
    smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
    X = [model.smiles_to_seq(smile) for smile in smile_ids]
    X = smile_ids
    y = np.zeros((n_samples, n_tasks, 1), dtype=np.float32)
    w = np.ones((n_samples, n_tasks))
    dataset = NumpyDataset(X, y, w, smile_ids)
@@ -350,10 +351,12 @@ class TestEstimators(unittest.TestCase):
    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      return {'x': x, 'weights': weights}, y
      smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
      return {'x': smiles_seq, 'weights': weights}, y

    # Create an estimator from it.
    x_col = tf.feature_column.numeric_column('x', shape=(seq_length,))
    x_col = tf.feature_column.numeric_column(
        'x', shape=(seq_length,), dtype=tf.int32)
    weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
    metrics = {'error': tf.metrics.mean_absolute_error}
    estimator = model.make_estimator(