Unverified Commit c099b76b authored by Karl Leswing's avatar Karl Leswing Committed by GitHub
Browse files

Merge pull request #1042 from miaecle/temp3

MolNet update
parents 9e553fca 29a97407
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -789,11 +789,13 @@ class DiskDataset(Dataset):
        else:
          shard_batch_size = batch_size

        num_local_batches = math.ceil(n_shard_samples / shard_batch_size)

        if n_shard_samples == 0:
          cur_shard += 1
          if batch_size is None:
            cur_global_batch += 1
          continue

        num_local_batches = math.ceil(n_shard_samples / shard_batch_size)
        if not deterministic:
          sample_perm = np.random.permutation(n_shard_samples)
        else:
+31 −0
Original line number Diff line number Diff line
@@ -1562,6 +1562,37 @@ class ReduceMean(Layer):
    return out_tensor


class ReduceMax(Layer):

  def __init__(self, in_layers=None, axis=None, **kwargs):
    if axis is not None and not isinstance(axis, Sequence):
      axis = [axis]
    self.axis = axis
    super(ReduceMax, self).__init__(in_layers, **kwargs)
    if axis is None:
      self._shape = tuple()
    else:
      try:
        parent_shape = self.in_layers[0].shape
        self._shape = [
            parent_shape[i] for i in range(len(parent_shape)) if i not in axis
        ]
      except:
        pass

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    if len(inputs) > 1:
      self.out_tensor = tf.stack(inputs)
    else:
      self.out_tensor = inputs[0]

    out_tensor = tf.reduce_max(self.out_tensor, axis=self.axis)
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor


class ToFloat(Layer):

  def __init__(self, in_layers=None, **kwargs):
+28 −6
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ class WeaveTensorGraph(TensorGraph):
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
        if y_b is not None:
          for index, label in enumerate(self.labels_fd):
            if self.mode == "classification":
              feed_dict[label] = to_one_hot(y_b[:, index])
@@ -182,6 +182,17 @@ class WeaveTensorGraph(TensorGraph):
        feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0)
        yield feed_dict

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    out = super(WeaveTensorGraph, self).predict_on_generator(
        generator, transformers=[], outputs=outputs)
    if outputs is None:
      outputs = self.outputs
    if len(outputs) > 1:
      out = np.stack(out, axis=1)

    out = undo_transforms(out, transformers)
    return out


class DTNNTensorGraph(TensorGraph):

@@ -193,7 +204,7 @@ class DTNNTensorGraph(TensorGraph):
               distance_min=-1,
               distance_max=18,
               output_activation=True,
               mode="classification",
               mode="regression",
               **kwargs):
    """
            Parameters
@@ -294,7 +305,7 @@ class DTNNTensorGraph(TensorGraph):
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
        if y_b is not None:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
        if w_b is not None:
@@ -456,7 +467,7 @@ class DAGTensorGraph(TensorGraph):
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
        if y_b is not None:
          for index, label in enumerate(self.labels_fd):
            if self.mode == "classification":
              feed_dict[label] = to_one_hot(y_b[:, index])
@@ -496,6 +507,17 @@ class DAGTensorGraph(TensorGraph):
        feed_dict[self.n_atoms] = n_atoms
        yield feed_dict

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    out = super(DAGTensorGraph, self).predict_on_generator(
        generator, transformers=[], outputs=outputs)
    if outputs is None:
      outputs = self.outputs
    if len(outputs) > 1:
      out = np.stack(out, axis=1)

    out = undo_transforms(out, transformers)
    return out


class PetroskiSuchTensorGraph(TensorGraph):
  """
@@ -1034,7 +1056,7 @@ class MPNNTensorGraph(TensorGraph):
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
        if y_b is not None:
          for index, label in enumerate(self.labels_fd):
            if self.mode == "classification":
              feed_dict[label] = to_one_hot(y_b[:, index])
@@ -1099,7 +1121,7 @@ class MPNNTensorGraph(TensorGraph):
      results = []
      for feed_dict in generator:
        # Extract number of unique samples in the batch from w_b
        n_valid_samples = len(np.nonzero(feed_dict[self.weights][:, 0])[0])
        n_valid_samples = len(np.nonzero(np.sum(feed_dict[self.weights], 1))[0])
        feed_dict = {
            self.layers[k.name].out_tensor: v
            for k, v in six.iteritems(feed_dict)
+18 −10
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ import copy
from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.models.tensorgraph.layers import Dense, Concat, SoftMax, \
  SoftMaxCrossEntropy, BatchNorm, WeightedError, Dropout, BatchNormalization, \
  Conv1D, MaxPool1D, Squeeze, Stack, Highway
  Conv1D, ReduceMax, Squeeze, Stack, Highway
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding

from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
@@ -104,6 +104,8 @@ class TextCNNTensorGraph(TensorGraph):
      Properties of filters used in the conv net
    num_filters: list of int, optional
      Properties of filters used in the conv net
    dropout: float, optional
      Dropout rate
    mode: str
      Either "classification" or "regression" for type of model.
    """
@@ -174,15 +176,10 @@ class TextCNNTensorGraph(TensorGraph):
              in_layers=[self.Embedding]))
      # Max-over-time pooling
      self.pooled_outputs.append(
          MaxPool1D(
              window_shape=self.seq_length - filter_size + 1,
              strides=1,
              padding='VALID',
              in_layers=[self.conv_layers[-1]]))
          ReduceMax(axis=1, in_layers=[self.conv_layers[-1]]))
    # Concat features from all filters(one feature per filter)
    concat_outputs = Concat(axis=2, in_layers=self.pooled_outputs)
    outputs = Squeeze(squeeze_dims=1, in_layers=concat_outputs)
    dropout = Dropout(dropout_prob=self.dropout, in_layers=[outputs])
    concat_outputs = Concat(axis=1, in_layers=self.pooled_outputs)
    dropout = Dropout(dropout_prob=self.dropout, in_layers=[concat_outputs])
    dense = Dense(
        out_channels=200, activation_fn=tf.nn.relu, in_layers=[dropout])
    # Highway layer from https://arxiv.org/pdf/1505.00387.pdf
@@ -211,7 +208,7 @@ class TextCNNTensorGraph(TensorGraph):
        cost = L2Loss(in_layers=[label, regression])
        costs.append(cost)
    if self.mode == "classification":
      all_cost = Concat(in_layers=costs, axis=1)
      all_cost = Stack(in_layers=costs, axis=1)
    elif self.mode == "regression":
      all_cost = Stack(in_layers=costs, axis=1)
    self.weights = Weights(shape=(None, self.n_tasks))
@@ -272,3 +269,14 @@ class TextCNNTensorGraph(TensorGraph):
      # Padding with '_'
      seq.append(self.char_dict['_'])
    return np.array(seq)

  def predict_on_generator(self, generator, transformers=[], outputs=None):
    out = super(TextCNNTensorGraph, self).predict_on_generator(
        generator, transformers=[], outputs=outputs)
    if outputs is None:
      outputs = self.outputs
    if len(outputs) > 1:
      out = np.stack(out, axis=1)

    out = undo_transforms(out, transformers)
    return out
+28 −1
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@ CheckFeaturizer = {
    ('bace_c', 'graphconv'): ['GraphConv', 75],
    ('bace_c', 'dag'): ['GraphConv', 75],
    ('bace_c', 'weave'): ['Weave', 75],
    ('bace_c', 'textcnn'): ['Raw', None],
    ('bace_c', 'mpnn'): ['Weave', [75, 14]],
    ('bbbp', 'logreg'): ['ECFP', 1024],
    ('bbbp', 'tf'): ['ECFP', 1024],
    ('bbbp', 'tf_robust'): ['ECFP', 1024],
@@ -19,6 +21,8 @@ CheckFeaturizer = {
    ('bbbp', 'graphconv'): ['GraphConv', 75],
    ('bbbp', 'dag'): ['GraphConv', 75],
    ('bbbp', 'weave'): ['Weave', 75],
    ('bbbp', 'textcnn'): ['Raw', None],
    ('bbbp', 'mpnn'): ['Weave', [75, 14]],
    ('clintox', 'logreg'): ['ECFP', 1024],
    ('clintox', 'tf'): ['ECFP', 1024],
    ('clintox', 'tf_robust'): ['ECFP', 1024],
@@ -29,6 +33,8 @@ CheckFeaturizer = {
    ('clintox', 'graphconv'): ['GraphConv', 75],
    ('clintox', 'dag'): ['GraphConv', 75],
    ('clintox', 'weave'): ['Weave', 75],
    ('clintox', 'textcnn'): ['Raw', None],
    ('clintox', 'mpnn'): ['Weave', [75, 14]],
    ('hiv', 'logreg'): ['ECFP', 1024],
    ('hiv', 'tf'): ['ECFP', 1024],
    ('hiv', 'tf_robust'): ['ECFP', 1024],
@@ -39,6 +45,8 @@ CheckFeaturizer = {
    ('hiv', 'graphconv'): ['GraphConv', 75],
    ('hiv', 'dag'): ['GraphConv', 75],
    ('hiv', 'weave'): ['Weave', 75],
    ('hiv', 'textcnn'): ['Raw', None],
    ('hiv', 'mpnn'): ['Weave', [75, 14]],
    ('muv', 'logreg'): ['ECFP', 1024],
    ('muv', 'tf'): ['ECFP', 1024],
    ('muv', 'tf_robust'): ['ECFP', 1024],
@@ -51,6 +59,8 @@ CheckFeaturizer = {
    ('muv', 'attn'): ['GraphConv', 75],
    ('muv', 'res'): ['GraphConv', 75],
    ('muv', 'weave'): ['Weave', 75],
    ('muv', 'textcnn'): ['Raw', None],
    ('muv', 'mpnn'): ['Weave', [75, 14]],
    ('pcba', 'logreg'): ['ECFP', 1024],
    ('pcba', 'tf'): ['ECFP', 1024],
    ('pcba', 'tf_robust'): ['ECFP', 1024],
@@ -58,6 +68,7 @@ CheckFeaturizer = {
    ('pcba', 'xgb'): ['ECFP', 1024],
    ('pcba', 'graphconv'): ['GraphConv', 75],
    ('pcba', 'weave'): ['Weave', 75],
    ('pcba', 'textcnn'): ['Raw', None],
    ('pcba_146', 'logreg'): ['ECFP', 1024],
    ('pcba_146', 'tf'): ['ECFP', 1024],
    ('pcba_146', 'tf_robust'): ['ECFP', 1024],
@@ -85,6 +96,8 @@ CheckFeaturizer = {
    ('sider', 'siamese'): ['GraphConv', 75],
    ('sider', 'attn'): ['GraphConv', 75],
    ('sider', 'res'): ['GraphConv', 75],
    ('sider', 'textcnn'): ['Raw', None],
    ('sider', 'mpnn'): ['Weave', [75, 14]],
    ('tox21', 'logreg'): ['ECFP', 1024],
    ('tox21', 'tf'): ['ECFP', 1024],
    ('tox21', 'tf_robust'): ['ECFP', 1024],
@@ -98,6 +111,8 @@ CheckFeaturizer = {
    ('tox21', 'siamese'): ['GraphConv', 75],
    ('tox21', 'attn'): ['GraphConv', 75],
    ('tox21', 'res'): ['GraphConv', 75],
    ('tox21', 'textcnn'): ['Raw', None],
    ('tox21', 'mpnn'): ['Weave', [75, 14]],
    ('toxcast', 'logreg'): ['ECFP', 1024],
    ('toxcast', 'tf'): ['ECFP', 1024],
    ('toxcast', 'tf_robust'): ['ECFP', 1024],
@@ -107,6 +122,8 @@ CheckFeaturizer = {
    ('toxcast', 'xgb'): ['ECFP', 1024],
    ('toxcast', 'graphconv'): ['GraphConv', 75],
    ('toxcast', 'weave'): ['Weave', 75],
    ('toxcast', 'textcnn'): ['Raw', None],
    ('toxcast', 'mpnn'): ['Weave', [75, 14]],
    ('bace_r', 'tf_regression'): ['ECFP', 1024],
    ('bace_r', 'rf_regression'): ['ECFP', 1024],
    ('bace_r', 'krr'): ['ECFP', 1024],
@@ -114,6 +131,7 @@ CheckFeaturizer = {
    ('bace_r', 'graphconvreg'): ['GraphConv', 75],
    ('bace_r', 'dag_regression'): ['GraphConv', 75],
    ('bace_r', 'weave_regression'): ['Weave', 75],
    ('bace_r', 'textcnn_regression'): ['Raw', None],
    ('chembl', 'tf_regression'): ['ECFP', 1024],
    ('chembl', 'rf_regression'): ['ECFP', 1024],
    ('chembl', 'krr'): ['ECFP', 1024],
@@ -135,6 +153,7 @@ CheckFeaturizer = {
    ('delaney', 'dag_regression'): ['GraphConv', 75],
    ('delaney', 'weave_regression'): ['Weave', 75],
    ('delaney', 'mpnn'): ['Weave', [75, 14]],
    ('delaney', 'textcnn_regression'): ['Raw', None],
    ('hopv', 'tf_regression'): ['ECFP', 1024],
    ('hopv', 'rf_regression'): ['ECFP', 1024],
    ('hopv', 'krr'): ['ECFP', 1024],
@@ -150,6 +169,7 @@ CheckFeaturizer = {
    ('lipo', 'dag_regression'): ['GraphConv', 75],
    ('lipo', 'weave_regression'): ['Weave', 75],
    ('lipo', 'mpnn'): ['Weave', [75, 14]],
    ('lipo', 'textcnn_regression'): ['Raw', None],
    ('nci', 'tf_regression'): ['ECFP', 1024],
    ('nci', 'rf_regression'): ['ECFP', 1024],
    ('nci', 'krr'): ['ECFP', 1024],
@@ -171,6 +191,7 @@ CheckFeaturizer = {
    ('sampl', 'dag_regression'): ['GraphConv', 75],
    ('sampl', 'weave_regression'): ['Weave', 75],
    ('sampl', 'mpnn'): ['Weave', [75, 14]],
    ('sampl', 'textcnn_regression'): ['Raw', None],
    ('kaggle', 'tf_regression'): [None, 14293],
    ('kaggle', 'rf_regression'): [None, 14293],
    ('kaggle', 'krr'): [None, 14293],
@@ -182,7 +203,9 @@ CheckFeaturizer = {
    ('qm7', 'rf_regression'): ['ECFP', 1024],
    ('qm7', 'krr'): ['ECFP', 1024],
    ('qm7', 'krr_ft'): ['CoulombMatrix', 1024],
    ('qm7', 'textcnn_regression'): ['Raw', None],
    ('qm7', 'graphconvreg'): ['GraphConv', 75],
    ('qm7', 'weave_regression'): ['Weave', 75],
    ('qm7', 'tf_regression_ft'): ['CoulombMatrix', [23, 23]],
    ('qm7', 'dtnn'): ['CoulombMatrix', [23, 23]],
    ('qm7', 'ani'): ['BPSymmetryFunction', [23, 4]],
@@ -198,6 +221,8 @@ CheckFeaturizer = {
    ('qm8', 'dtnn'): ['CoulombMatrix', [26, 26]],
    ('qm8', 'ani'): ['BPSymmetryFunction', [26, 4]],
    ('qm8', 'mpnn'): ['MP', [70, 8]],
    ('qm8', 'weave_regression'): ['Weave', 75],
    ('qm8', 'textcnn_regression'): ['Raw', None],
    ('qm9', 'tf_regression'): ['ECFP', 1024],
    ('qm9', 'rf_regression'): ['ECFP', 1024],
    ('qm9', 'krr'): ['ECFP', 1024],
@@ -206,7 +231,9 @@ CheckFeaturizer = {
    ('qm9', 'krr_ft'): ['CoulombMatrix', 1024],
    ('qm9', 'dtnn'): ['CoulombMatrix', [29, 29]],
    ('qm9', 'ani'): ['BPSymmetryFunction', [29, 4]],
    ('qm9', 'mpnn'): ['MP', [70, 8]]
    ('qm9', 'mpnn'): ['MP', [70, 8]],
    ('qm9', 'weave_regression'): ['Weave', 75],
    ('qm9', 'textcnn_regression'): ['Raw', None]
}

CheckSplit = {
Loading