Commit 8ae88ee5 authored by miaecle's avatar miaecle
Browse files

yapfed

parent 293073fb
Loading
Loading
Loading
Loading
+43 −19
Original line number Diff line number Diff line
@@ -405,9 +405,11 @@ Scaffold splitting
|bace_r          |Random forest       |Random      |0.958         |0.646         |
|                |NN regression       |Random      |0.898         |0.680         |
|                |Graphconv regression|Random      |0.760         |0.676         |
|                |Weave regression    |Random      |0.523         |0.577         |
|                |Random forest       |Scaffold    |0.956         |0.201         |
|                |NN regression       |Scaffold    |0.897         |0.208         |
|                |Graphconv regression|Scaffold    |0.783         |0.068         |
|                |Weave regression    |Scaffold    |0.602         |0.018         |
|chembl          |MT-NN regression    |Index       |0.828         |0.565         |
|                |Graphconv regression|Index       |0.192         |0.293         |
|                |MT-NN regression    |Random      |0.829         |0.562         |
@@ -417,49 +419,55 @@ Scaffold splitting
|clearance       |Random forest       |Index       |0.953         |0.244         |
|                |NN regression       |Index       |0.884         |0.211         |
|                |Graphconv regression|Index       |0.696         |0.230         |
|                |Weave regression    |Index       |0.261         |0.107         |
|                |Random forest       |Random      |0.952         |0.547         |
|                |NN regression       |Random      |0.880         |0.273         |
|                |Graphconv regression|Random      |0.685         |0.302         |
|                |Weave regression    |Random      |0.229         |0.129         |
|                |Random forest       |Scaffold    |0.952         |0.266         |
|                |NN regression       |Scaffold    |0.871         |0.154         |
|                |Graphconv regression|Scaffold    |0.628         |0.277         |
|                |Weave regression    |Scaffold    |0.228         |0.226         |
|delaney         |Random forest       |Index       |0.953         |0.626         |
|                |XGBoost             |Index       |0.898         |0.664         |
|                |NN regression       |Index       |0.868         |0.578         |
|                |Graphconv regression|Index       |0.967         |0.790         |
|                |Weave regression    |Index       |0.967         |0.860         |
|                |Weave regression    |Index       |0.965         |0.888         |
|                |Random forest       |Random      |0.951         |0.684         |
|                |XGBoost             |Random      |0.927         |0.727         |
|                |NN regression       |Random      |0.865         |0.574         |
|                |Graphconv regression|Random      |0.964         |0.782         |
|                |Weave regression    |Random      |0.965         |0.925         |
|                |Weave regression    |Random      |0.954         |0.917         |
|                |Random forest       |Scaffold    |0.953         |0.284         |
|                |XGBoost             |Scaffold    |0.890         |0.316         |
|                |NN regression       |Scaffold    |0.866         |0.342         |
|                |Graphconv regression|Scaffold    |0.967         |0.606         |
|                |Weave regression    |Scaffold    |0.968         |0.752         |
|                |Weave regression    |Scaffold    |0.976         |0.797         |
|hopv            |Random forest       |Index       |0.943         |0.338         |
|                |MT-NN regression    |Index       |0.725         |0.293         |
|                |Graphconv regression|Index       |0.307         |0.284         |
|                |Weave regression    |Index       |0.046         |0.026         |
|                |Random forest       |Random      |0.943         |0.513         |
|                |MT-NN regression    |Random      |0.716         |0.289         |
|                |Graphconv regression|Random      |0.329         |0.239         |
|                |Weave regression    |Random      |0.080         |0.084         |
|                |Random forest       |Scaffold    |0.946         |0.470         |
|                |MT-NN regression    |Scaffold    |0.719         |0.429         |
|                |Graphconv regression|Scaffold    |0.286         |0.155         |
|                |Weave regression    |Scaffold    |0.097         |0.082         |
|kaggle          |MT-NN regression    |User-defined|0.748         |0.452         |
|lipo            |Random forest       |Index       |0.960         |0.483         |
|                |NN regression       |Index       |0.825         |0.513         |
|                |Graphconv regression|Index       |0.865         |0.704         |
|                |DAG regression      |Index       |0.752         |0.507         |
|                |Weave regression    |Index       |0.507         |0.492         |
|                |Random forest       |Random      |0.958         |0.518         |
|                |NN regression       |Random      |0.818         |0.445         |
|                |Graphconv regression|Random      |0.867         |0.722         |
|                |DAG regression      |Random      |0.751         |0.446         |
|                |Weave regression    |Random      |0.551         |0.528         |
|                |Random forest       |Scaffold    |0.958         |0.329         |
|                |NN regression       |Scaffold    |0.831         |0.302         |
|                |Graphconv regression|Scaffold    |0.882         |0.593         |
|                |DAG regression      |Scaffold    |0.670         |0.378         |
|                |Weave regression    |Scaffold    |0.566         |0.448         |
|nci             |XGBoost             |Index       |0.441         |0.066         |
|                |MT-NN regression    |Index       |0.690         |0.062         |
|                |Graphconv regression|Index       |0.123         |0.053         |
@@ -478,15 +486,15 @@ Scaffold splitting
|ppb             |Random forest       |Index       |0.951         |0.235         |
|                |NN regression       |Index       |0.902         |0.333         |
|                |Graphconv regression|Index       |0.673         |0.442         |
|                |DAG regression      |Index       |0.516         |0.295         |
|                |Weave regression    |Index       |0.418         |0.301         |
|                |Random forest       |Random      |0.950         |0.220         |
|                |NN regression       |Random      |0.903         |0.244         |
|                |Graphconv regression|Random      |0.646         |0.429         |
|                |DAG regression      |Random      |0.571         |0.227         |
|                |Weave regression    |Random      |0.408         |0.284         |
|                |Random forest       |Scaffold    |0.943         |0.176         |
|                |NN regression       |Scaffold    |0.902         |0.144         |
|                |Graphconv regression|Scaffold    |0.695         |0.391         |
|                |DAG regression      |Scaffold    |0.632         |0.272         |
|                |Weave regression    |Scaffold    |0.401         |0.373         |
|qm7             |NN regression       |Index       |0.997         |0.992         |
|                |DTNN                |Index       |0.998         |0.996         |
|                |NN regression       |Random      |0.998         |0.997         |
@@ -512,17 +520,17 @@ Scaffold splitting
|                |XGBoost             |Index       |0.884         |0.784         |
|                |NN regression       |Index       |0.917         |0.764         |
|                |Graphconv regression|Index       |0.982         |0.903         |
|                |Weave regression    |Index       |0.986         |0.924         | 
|                |Weave regression    |Index       |0.993         |0.948         | 
|                |Random forest       |Random      |0.967         |0.752         |
|                |XGBoost             |Random      |0.906         |0.745         |
|                |NN regression       |Random      |0.908         |0.711         |
|                |Graphconv regression|Random      |0.987         |0.868         |
|                |Weave regression    |Random      |0.997         |0.888         |
|                |Weave regression    |Random      |0.992         |0.888         |
|                |Random forest       |Scaffold    |0.966         |0.477         |
|                |XGBoost             |Scaffold    |0.918         |0.439         |
|                |NN regression       |Scaffold    |0.891         |0.217         |
|                |Graphconv regression|Scaffold    |0.985         |0.666         |
|                |Weave regression    |Scaffold    |0.991         |0.833         |
|                |Weave regression    |Scaffold    |0.988         |0.876         |

|Dataset         |Model            |Splitting   |Train score/MAE(kcal/mol)|Valid score/MAE(kcal/mol)|
|----------------|-----------------|------------|-------------------------|-------------------------|
@@ -575,12 +583,14 @@ Time needed for benchmark test(~20h in total)
|                |Random forest       |10              |80             |
|                |IRV                 |10              |10             |
|                |Graph convolution   |15              |70             |
|                |Weave               |15              |120            |
|bbbp            |Logistic regression |20              |10             |
|                |NN classification   |20              |20             |
|                |Robust NN           |20              |20             |
|                |Random forest       |20              |120            |
|                |IRV                 |20              |10             |
|                |Graph convolution   |20              |150            |
|                |Weave               |20              |100            |
|clintox         |Logistic regression |15              |10             |
|                |XGBoost             |15              |33             |
|                |MT-NN classification|15              |20             |
@@ -588,6 +598,7 @@ Time needed for benchmark test(~20h in total)
|                |Random forest       |15              |200            |
|                |IRV                 |15              |10             |
|                |Graph convolution   |20              |130            |
|                |Weave               |20              |90             |
|hiv             |Logistic regression |180             |40             |
|                |XGBoost             |180             |1000           |
|                |NN classification   |180             |350            |
@@ -595,11 +606,13 @@ Time needed for benchmark test(~20h in total)
|                |Random forest       |180             |2800           |
|                |IRV                 |180             |200            |
|                |Graph convolution   |180             |1300           |
|                |Weave               |180             |2000           |
|muv             |Logistic regression |600             |450            |
|                |XGBoost             |600             |3500           |
|                |MT-NN classification|600             |400            |
|                |Robust MT-NN        |600             |550            |
|                |Graph convolution   |800             |1800           |
|                |Weave               |800             |4400           |
|pcba            |Logistic regression |1800            |10000          |
|                |XGBoost             |1800            |470000         |
|                |MT-NN classification|1800            |9000           |
@@ -612,6 +625,7 @@ Time needed for benchmark test(~20h in total)
|                |Random forest       |15              |2200           |
|                |IRV                 |15              |150            |
|                |Graph convolution   |20              |50             |
|                |Weave               |20              |200            |
|tox21           |Logistic regression |30              |60             |
|                |XGBoost             |30              |1500           |
|                |MT-NN classification|30              |60             |
@@ -619,30 +633,37 @@ Time needed for benchmark test(~20h in total)
|                |Random forest       |30              |6000           |
|                |IRV                 |30              |650            |
|                |Graph convolution   |30              |160            |
|                |Weave               |30              |300            |
|toxcast         |Logistic regression |80              |2600           |
|                |XGBoost             |80              |30000          |
|                |MT-NN classification|80              |2300           |
|                |Robust MT-NN        |80              |4000           |
|                |Graph convolution   |80              |900            |
|                |Weave               |80              |2000           |
|bace_r          |NN regression       |10              |30             |
|                |Graphconv regression|10              |110            |
|                |Random forest       |10              |50             |
|                |Graphconv regression|10              |110            |
|                |Weave regression    |10              |150            |
|chembl          |MT-NN regression    |200             |9000           |
|                |Graphconv regression|250             |1800           |
|clearance       |NN regression       |10              |20             |
|                |Graphconv regression|10              |60             |
|                |Random forest       |10              |10             |
|                |Graphconv regression|10              |60             |
|                |Weave regression    |10              |70             |
|delaney         |NN regression       |10              |40             |
|                |XGBoost             |10              |50             |
|                |graphconv regression|10              |40             |
|                |Random forest       |10              |30             |
|                |graphconv regression|10              |40             |
|                |Weave regression    |10              |40             |
|hopv            |MT-NN regression    |10              |20             |
|                |Graphconv regression|10              |50             |
|                |Random forest       |10              |50             |
|                |Graphconv regression|10              |50             |
|                |Weave regression    |10              |60             |
|kaggle          |MT-NN regression    |2200            |3200           |
|lipo            |NN regression       |30              |60             |
|                |Graphconv regression|30              |240            |
|                |Random forest       |30              |60             |
|                |Graphconv regression|30              |240            |
|                |Weave regression    |30              |280            |
|nci             |MT-NN regression    |400             |1200           |
|                |XGBoost             |400             |28000          |
|                |graphconv regression|400             |2500           |
@@ -650,16 +671,19 @@ Time needed for benchmark test(~20h in total)
|pdbbind(refined)|NN regression       |0(featurized)   |40             |
|pdbbind(full)   |NN regression       |0(featurized)   |60             |
|ppb             |NN regression       |20              |30             |
|                |Graphconv regression|20              |100            |
|                |Random forest       |20              |30             |
|                |Graphconv regression|20              |100            |
|                |Weave regression    |20              |120            |
|qm7             |MT-NN regression    |10              |400            |
|qm7b            |MT-NN regression    |10              |600            |
|qm8             |MT-NN regression    |60              |1000           |
|qm9             |MT-NN regression    |220             |10000          |
|sampl           |NN regression       |10              |30             |
|                |XGBoost             |10              |20             |
|                |graphconv regression|10              |40             |
|                |Random forest       |10              |20             |
|                |graphconv regression|10              |40             |
|                |Weave regression    |10              |20             |



### Gitter
+3 −2
Original line number Diff line number Diff line
@@ -195,6 +195,7 @@ class SequentialWeaveGraph(SequentialGraph):
        self.output = layer(self.output)
      self.layers.append(layer)


class SequentialWeaveGraph_v2(SequentialGraph):
  """SequentialGraph for Weave models
  """
@@ -206,8 +207,8 @@ class SequentialWeaveGraph_v2(SequentialGraph):
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    with self.graph.as_default():
      self.graph_topology = WeaveGraphTopology_v2(self.batch_size, self.max_atoms, 
                                                  self.n_atom_feat,self.n_pair_feat)
      self.graph_topology = WeaveGraphTopology_v2(
          self.batch_size, self.max_atoms, self.n_atom_feat, self.n_pair_feat)
      self.output = self.graph_topology.get_atom_features_placeholder()
      self.output_P = self.graph_topology.get_pair_features_placeholder()
    self.layers = []
+23 −16
Original line number Diff line number Diff line
@@ -493,10 +493,15 @@ class WeaveGraphTopology(GraphTopology):
    }
    return dict_DTNN


class WeaveGraphTopology_v2(GraphTopology):
  """Manages placeholders associated with batch of graphs and their topology"""

  def __init__(self, batch_size, max_atoms, n_atom_feat, n_pair_feat,
  def __init__(self,
               batch_size,
               max_atoms,
               n_atom_feat,
               n_pair_feat,
               name='Weave_topology'):
    """
    Parameters
@@ -525,17 +530,17 @@ class WeaveGraphTopology_v2(GraphTopology):
        shape=(None, self.n_pair_feat),
        name=self.name + '_pair_features')
    self.pair_split_placeholder = tf.placeholder(
        dtype='int32', shape=(None,), 
        name=self.name + '_pair_split')
        dtype='int32', shape=(None,), name=self.name + '_pair_split')
    self.atom_split_placeholder = tf.placeholder(
        dtype='int32', shape=(self.batch_size,), 
        name=self.name + '_atom_split')
        dtype='int32', shape=(self.batch_size,), name=self.name + '_atom_split')
    self.atom_to_pair_placeholder = tf.placeholder(
        dtype='int32', shape=(None,2), 
        name=self.name + '_atom_to_pair')
        dtype='int32', shape=(None, 2), name=self.name + '_atom_to_pair')

    # Define the list of tensors to be used as topology
    self.topology = [self.pair_split_placeholder, self.atom_split_placeholder, self.atom_to_pair_placeholder]
    self.topology = [
        self.pair_split_placeholder, self.atom_split_placeholder,
        self.atom_to_pair_placeholder
    ]
    self.inputs = [self.atom_features_placeholder]
    self.inputs += self.topology

@@ -572,7 +577,8 @@ class WeaveGraphTopology_v2(GraphTopology):
      atom_split.append(n_atoms)
      # index of pair features
      C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
      atom_to_pair.append(np.transpose(np.array([C1.flatten()+start, C0.flatten()+start])))
      atom_to_pair.append(
          np.transpose(np.array([C1.flatten() + start, C0.flatten() + start])))
      # number of pairs for each atom
      pair_split.extend(C1.flatten() + start)
      start = start + n_atoms
@@ -580,8 +586,9 @@ class WeaveGraphTopology_v2(GraphTopology):
      # atom features
      atom_feat.append(mol.get_atom_features())
      # pair features
      pair_feat.append(np.reshape(mol.get_pair_features(), 
                                  (n_atoms*n_atoms, self.n_pair_feat)))
      pair_feat.append(
          np.reshape(mol.get_pair_features(), (n_atoms * n_atoms,
                                               self.n_pair_feat)))

    atom_feat = np.concatenate(atom_feat, axis=0)
    pair_feat = np.concatenate(pair_feat, axis=0)
+17 −9
Original line number Diff line number Diff line
@@ -271,10 +271,14 @@ def benchmark_classification(train_dataset,
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    graph_model = deepchem.nn.SequentialWeaveGraph_v2(batch_size,
        max_atoms=max_atoms, n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model = deepchem.nn.SequentialWeaveGraph_v2(
        batch_size,
        max_atoms=max_atoms,
        n_atom_feat=n_features,
        n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer_v2(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveLayer_v2(max_atoms, 50, 50, update_pair=False))
    graph_model.add(
        deepchem.nn.WeaveLayer_v2(max_atoms, 50, 50, update_pair=False))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
@@ -595,10 +599,14 @@ def benchmark_regression(train_dataset,
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    graph_model = deepchem.nn.SequentialWeaveGraph_v2(batch_size,
        max_atoms=max_atoms, n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model = deepchem.nn.SequentialWeaveGraph_v2(
        batch_size,
        max_atoms=max_atoms,
        n_atom_feat=n_features,
        n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer_v2(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveLayer_v2(max_atoms, 50, 50, update_pair=False))
    graph_model.add(
        deepchem.nn.WeaveLayer_v2(max_atoms, 50, 50, update_pair=False))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
+34 −22
Original line number Diff line number Diff line
@@ -105,18 +105,23 @@ class WeaveLayer(Layer):
    ]
    if self.update_pair:
      self.W_AP = self.init([self.n_atom_input_feat * 2, self.n_hidden_AP])
      self.b_AP = model_ops.zeros(shape=[self.n_hidden_AP,])
      self.b_AP = model_ops.zeros(shape=[
          self.n_hidden_AP,
      ])

      self.W_PP = self.init([self.n_pair_input_feat, self.n_hidden_PP])
      self.b_PP = model_ops.zeros(shape=[self.n_hidden_PP,])
      self.b_PP = model_ops.zeros(shape=[
          self.n_hidden_PP,
      ])

      self.W_P = self.init([self.n_hidden_P, self.n_pair_output_feat])
      self.b_P = model_ops.zeros(shape=[self.n_pair_output_feat,])
      
      self.trainable_weights.extend([
          self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P
      self.b_P = model_ops.zeros(shape=[
          self.n_pair_output_feat,
      ])

      self.trainable_weights.extend(
          [self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P])

  def call(self, x, mask=None):
    """Execute this layer on input tensors.

@@ -176,6 +181,7 @@ class WeaveLayer(Layer):


class WeaveLayer_v2(WeaveLayer):

  def call(self, x, mask=None):
    """Execute this layer on input tensors.

@@ -215,10 +221,14 @@ class WeaveLayer_v2(WeaveLayer):
    A = self.activation(A)

    if self.update_pair:
      AP_ij = tf.matmul(tf.reshape(tf.gather(atom_features, atom_to_pair), 
      AP_ij = tf.matmul(
          tf.reshape(
              tf.gather(atom_features, atom_to_pair),
              [-1, 2 * self.n_atom_input_feat]), self.W_AP) + self.b_AP
      AP_ij = self.activation(AP_ij)
      AP_ji = tf.matmul(tf.reshape(tf.gather(atom_features, tf.reverse(atom_to_pair, [1])), 
      AP_ji = tf.matmul(
          tf.reshape(
              tf.gather(atom_features, tf.reverse(atom_to_pair, [1])),
              [-1, 2 * self.n_atom_input_feat]), self.W_AP) + self.b_AP
      AP_ji = self.activation(AP_ji)

@@ -400,7 +410,9 @@ class WeaveGather(Layer):
    outputs = tf.reshape(outputs, [-1, self.n_input * 11])
    return outputs


class WeaveGather_v2(WeaveGather):

  def call(self, x, mask=None):
    """Execute this layer on input tensors.