Commit 1a3c14b0 authored by miaecle's avatar miaecle
Browse files

fix bug and yapf

parent 1d002eef
Loading
Loading
Loading
Loading
+15 −8
Original line number Diff line number Diff line
@@ -496,26 +496,29 @@ Scaffold splitting
|                |Graphconv regression|Scaffold    |0.695         |0.391         |
|                |Weave regression    |Scaffold    |0.401         |0.373         |
|qm7             |NN regression       |Index       |0.997         |0.992         |
|                |DTNN                |Index       |0.998         |0.996         |
|                |DTNN                |Index       |0.997         |0.995         |
|                |NN regression       |Random      |0.998         |0.997         |
|                |DTNN                |Random      |0.998         |0.998         |
|                |DTNN                |Random      |0.999         |0.998         |
|                |NN regression       |Stratified  |0.998         |0.997         | 
|                |DTNN                |Stratified  |0.998         |0.998         | 
|qm7b            |MT-NN regression    |Index       |0.903         |0.789         |
|                |DTNN                |Index       |0.872         |0.821         |
|                |DTNN                |Index       |0.919         |0.863         |
|                |MT-NN regression    |Random      |0.893         |0.839         |
|                |DTNN                |Random      |0.865         |0.849         |
|                |DTNN                |Random      |0.924         |0.898         |
|                |MT-NN regression    |Stratified  |0.891         |0.859         | 
|                |DTNN                |Stratified  |0.853         |0.839         | 
|                |DTNN                |Stratified  |0.913         |0.894         | 
|qm8             |MT-NN regression    |Index       |0.783         |0.656         |
|                |DTNN                |Index       |0.737         |0.639         |
|                |DTNN                |Index       |0.857         |0.691         |
|                |MT-NN regression    |Random      |0.747         |0.660         |
|                |DTNN                |Random      |0.731         |0.711         |
|                |DTNN                |Random      |0.842         |0.756         |
|                |MT-NN regression    |Stratified  |0.756         |0.681         |
|                |DTNN                |Stratified  |0.714         |0.683         | 
|                |DTNN                |Stratified  |0.844         |0.758         | 
|qm9             |MT-NN regression    |Index       |0.733         |0.766         |
|                |DTNN                |Index       |0.918         |0.831         | 
|                |MT-NN regression    |Random      |0.852         |0.833         |
|                |DTNN                |Random      |0.942         |0.948         | 
|                |MT-NN regression    |Stratified  |0.764         |0.792         | 
|                |DTNN                |Stratified  |0.941         |0.867         | 
|sampl           |Random forest       |Index       |0.968         |0.736         |
|                |XGBoost             |Index       |0.884         |0.784         |
|                |NN regression       |Index       |0.917         |0.764         |
@@ -675,9 +678,13 @@ Time needed for benchmark test(~20h in total)
|                |Graphconv regression|20              |100            |
|                |Weave regression    |20              |120            |
|qm7             |MT-NN regression    |10              |400            |
|                |DTNN                |10              |600            |
|qm7b            |MT-NN regression    |10              |600            |
|                |DTNN                |10              |600            |
|qm8             |MT-NN regression    |60              |1000           |
|                |DTNN                |10              |2000           |
|qm9             |MT-NN regression    |220             |10000          |
|                |DTNN                |10              |14000          |
|sampl           |NN regression       |10              |30             |
|                |XGBoost             |10              |20             |
|                |Random forest       |10              |20             |
+27 −17
Original line number Diff line number Diff line
@@ -18,7 +18,9 @@ from deepchem.nn import model_ops

from deepchem.models.tensorgraph.layers import Layer


class Combine_AP(Layer):

  def __init__(self, **kwargs):
    super(Combine_AP, self).__init__(**kwargs)

@@ -27,13 +29,16 @@ class Combine_AP(Layer):
    P = self.in_layers[1].out_tensor
    self.out_tensor = [A, P]


class Separate_AP(Layer):

  def __init__(self, **kwargs):
    super(Separate_AP, self).__init__(**kwargs)

  def _create_tensor(self):
    self.out_tensor = self.in_layers[0].out_tensor[0]


class WeaveLayer(Layer):
  """ TensorGraph style implementation
  The same as deepchem.nn.WeaveLayer
@@ -173,6 +178,7 @@ class WeaveLayer(Layer):
      P = pair_features
    self.out_tensor = [A, P]


class WeaveGather(Layer):
  """ TensorGraph style implementation
  The same as deepchem.nn.WeaveGather
@@ -233,7 +239,6 @@ class WeaveGather(Layer):
      output_molecules = self.activation(output_molecules)
    self.out_tensor = output_molecules


  def gaussian_histogram(self, x):
    gaussian_memberships = [(-1.645, 0.283), (-1.080, 0.170), (-0.739, 0.134),
                            (-0.468, 0.118), (-0.228, 0.114), (0., 0.114),
@@ -352,6 +357,7 @@ class DTNNGather(Layer):
  """ TensorGraph style implementation
  The same as deepchem.nn.DTNNGather
  """

  def __init__(self,
               n_embedding=30,
               n_outputs=100,
@@ -398,10 +404,12 @@ class DTNNGather(Layer):
    output = tf.segment_sum(output, atom_membership)
    self.out_tensor = output


class DAGLayer(Layer):
  """ TensorGraph style implementation
  The same as deepchem.nn.DAGLayer
  """

  def __init__(self,
               n_graph_feat=30,
               n_atom_feat=75,
@@ -481,27 +489,28 @@ class DAGLayer(Layer):

    n_atoms = self.in_layers[4].out_tensor
    # initialize graph features for each graph
    graph_features_initial = tf.zeros((self.max_atoms*self.batch_size, self.max_atoms+1, self.n_graph_feat))
    graph_features_initial = tf.zeros((self.max_atoms * self.batch_size,
                                       self.max_atoms + 1, self.n_graph_feat))
    # initialize graph features for each graph
    # another row of zeros is generated for padded dummy atoms
    graph_features = tf.Variable(
        graph_features_initial,
        trainable=False)
    graph_features = tf.Variable(graph_features_initial, trainable=False)

    for count in range(self.max_atoms):
      # `count`-th step
      # extracting atom features of target atoms: (batch_size*max_atoms) * n_atom_features
      mask = calculation_masks[:, count]
      current_round = tf.boolean_mask(calculation_orders[:, count], mask)
      batch_atom_features = tf.gather(atom_features,
                                      current_round)
      batch_atom_features = tf.gather(atom_features, current_round)

      # generating index for graph features used in the inputs
      index = tf.stack(
          [
              tf.reshape(
                  tf.stack([tf.boolean_mask(tf.range(n_atoms), mask)] * (self.max_atoms - 1), axis=1),
                  [-1]), tf.reshape(tf.boolean_mask(parents[:, count, 1:], mask), [-1])
                  tf.stack(
                      [tf.boolean_mask(tf.range(n_atoms), mask)] *
                      (self.max_atoms - 1),
                      axis=1), [-1]),
              tf.reshape(tf.boolean_mask(parents[:, count, 1:], mask), [-1])
          ],
          axis=1)
      # extracting graph features for parents of the target atoms, then flatten
@@ -539,6 +548,7 @@ class DAGGather(Layer):
  """ TensorGraph style implementation
  The same as deepchem.nn.DAGGather
  """

  def __init__(self,
               n_graph_feat=30,
               n_outputs=30,
+90 −65
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ from deepchem.models.tensorgraph.graph_layers import WeaveLayer, WeaveGather, \
from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.trans import undo_transforms


class WeaveTensorGraph(TensorGraph):

  def __init__(self,
@@ -49,7 +50,8 @@ class WeaveTensorGraph(TensorGraph):
        update_pair=False,
        in_layers=[weave_layer1, self.pair_split, self.atom_to_pair])
    separated = Separate_AP(in_layers=[weave_layer2])
    dense1 = Dense(out_channels=self.n_graph_feat, 
    dense1 = Dense(
        out_channels=self.n_graph_feat,
        activation_fn=tf.nn.relu,
        in_layers=[separated])
    batch_norm1 = BatchNormLayer(in_layers=[dense1])
@@ -63,7 +65,8 @@ class WeaveTensorGraph(TensorGraph):
    self.labels_fd = []
    for task in range(self.n_tasks):
      if self.mode == "classification":
        classification = Dense(out_channels=2, activation_fn=None, in_layers=[weave_gather])
        classification = Dense(
            out_channels=2, activation_fn=None, in_layers=[weave_gather])
        softmax = SoftMax(in_layers=[classification])
        self.add_output(softmax)

@@ -72,7 +75,8 @@ class WeaveTensorGraph(TensorGraph):
        cost = SoftMaxCrossEntropy(in_layers=[label, classification])
        costs.append(cost)
      if self.mode == "regression":
        regression = Dense(out_channels=1, activation_fn=None, in_layers=[weave_gather])
        regression = Dense(
            out_channels=1, activation_fn=None, in_layers=[weave_gather])
        self.add_output(regression)

        label = Label(shape=(None, 1))
@@ -119,7 +123,8 @@ class WeaveTensorGraph(TensorGraph):
          # index of pair features
          C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
          atom_to_pair.append(
              np.transpose(np.array([C1.flatten() + start, C0.flatten() + start])))
              np.transpose(
                  np.array([C1.flatten() + start, C0.flatten() + start])))
          # number of pairs for each atom
          pair_split.extend(C1.flatten() + start)
          start = start + n_atoms
@@ -138,7 +143,6 @@ class WeaveTensorGraph(TensorGraph):
        feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0)
        yield feed_dict


  def predict(self, dataset, transformers=[], batch_size=None):
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)
@@ -175,6 +179,7 @@ class WeaveTensorGraph(TensorGraph):
          results.append(result)
        return np.concatenate(results, axis=0)


class DTNNTensorGraph(TensorGraph):

  def __init__(self,
@@ -206,15 +211,22 @@ class DTNNTensorGraph(TensorGraph):
    self.distance_membership_i = Feature(shape=(None,), dtype=tf.int32)
    self.distance_membership_j = Feature(shape=(None,), dtype=tf.int32)

    dtnn_embedding = DTNNEmbedding(n_embedding=self.n_embedding, in_layers=[self.atom_number])
    dtnn_embedding = DTNNEmbedding(
        n_embedding=self.n_embedding, in_layers=[self.atom_number])
    dtnn_layer1 = DTNNStep(
        n_embedding=self.n_embedding,
        n_distance=self.n_distance,
      in_layers=[dtnn_embedding, self.distance, self.distance_membership_i, self.distance_membership_j])
        in_layers=[
            dtnn_embedding, self.distance, self.distance_membership_i,
            self.distance_membership_j
        ])
    dtnn_layer2 = DTNNStep(
        n_embedding=self.n_embedding,
        n_distance=self.n_distance,
      in_layers=[dtnn_layer1, self.distance, self.distance_membership_i, self.distance_membership_j])
        in_layers=[
            dtnn_layer1, self.distance, self.distance_membership_i,
            self.distance_membership_j
        ])
    dtnn_gather = DTNNGather(
        n_embedding=self.n_embedding,
        n_outputs=self.n_hidden,
@@ -223,7 +235,8 @@ class DTNNTensorGraph(TensorGraph):
    costs = []
    self.labels_fd = []
    for task in range(self.n_tasks):
      regression = Dense(out_channels=1, activation_fn=None, in_layers=[dtnn_gather])
      regression = Dense(
          out_channels=1, activation_fn=None, in_layers=[dtnn_gather])
      self.add_output(regression)

      label = Label(shape=(None, 1))
@@ -281,13 +294,14 @@ class DTNNTensorGraph(TensorGraph):
        distance = np.concatenate(distance, 0)
        feed_dict[self.distance] = np.exp(-np.square(distance - self.steps) /
                                          (2 * self.step_size**2))
        feed_dict[self.distance_membership_i] = np.concatenate(distance_membership_i)
        feed_dict[self.distance_membership_j] = np.concatenate(distance_membership_j)
        feed_dict[self.distance_membership_i] = np.concatenate(
            distance_membership_i)
        feed_dict[self.distance_membership_j] = np.concatenate(
            distance_membership_j)
        feed_dict[self.atom_membership] = np.concatenate(atom_membership)

        yield feed_dict


  def predict(self, dataset, transformers=[], batch_size=None):
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator, transformers)
@@ -324,6 +338,7 @@ class DTNNTensorGraph(TensorGraph):
          results.append(result)
        return np.concatenate(results, axis=0)


class DAGTensorGraph(TensorGraph):

  def __init__(self,
@@ -343,9 +358,12 @@ class DAGTensorGraph(TensorGraph):

  def build_graph(self):
    self.atom_features = Feature(shape=(None, self.n_atom_feat))
    self.parents = Feature(shape=(None, self.max_atoms, self.max_atoms), dtype=tf.int32)
    self.calculation_orders = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.calculation_masks = Feature(shape=(None, self.max_atoms), dtype=tf.bool)
    self.parents = Feature(
        shape=(None, self.max_atoms, self.max_atoms), dtype=tf.int32)
    self.calculation_orders = Feature(
        shape=(None, self.max_atoms), dtype=tf.int32)
    self.calculation_masks = Feature(
        shape=(None, self.max_atoms), dtype=tf.bool)
    self.membership = Feature(shape=(None,), dtype=tf.int32)
    self.n_atoms = Feature(shape=(), dtype=tf.int32)
    dag_layer1 = DAGLayer(
@@ -353,7 +371,10 @@ class DAGTensorGraph(TensorGraph):
        n_atom_feat=self.n_atom_feat,
        max_atoms=self.max_atoms,
        batch_size=self.batch_size,
        in_layers=[self.atom_features, self.parents, self.calculation_orders, self.calculation_masks, self.n_atoms])
        in_layers=[
            self.atom_features, self.parents, self.calculation_orders,
            self.calculation_masks, self.n_atoms
        ])
    dag_gather = DAGGather(
        n_graph_feat=self.n_graph_feat,
        n_outputs=self.n_outputs,
@@ -364,7 +385,8 @@ class DAGTensorGraph(TensorGraph):
    self.labels_fd = []
    for task in range(self.n_tasks):
      if self.mode == "classification":
        classification = Dense(out_channels=2, activation_fn=None, in_layers=[dag_gather])
        classification = Dense(
            out_channels=2, activation_fn=None, in_layers=[dag_gather])
        softmax = SoftMax(in_layers=[classification])
        self.add_output(softmax)

@@ -373,7 +395,8 @@ class DAGTensorGraph(TensorGraph):
        cost = SoftMaxCrossEntropy(in_layers=[label, classification])
        costs.append(cost)
      if self.mode == "regression":
        regression = Dense(out_channels=1, activation_fn=None, in_layers=[dag_gather])
        regression = Dense(
            out_channels=1, activation_fn=None, in_layers=[dag_gather])
        self.add_output(regression)

        label = Label(shape=(None, 1))
@@ -430,8 +453,10 @@ class DAGTensorGraph(TensorGraph):

        feed_dict[self.atom_features] = np.concatenate(atoms_all, axis=0)
        feed_dict[self.parents] = np.stack(parents_all, axis=0)
        feed_dict[self.calculation_orders] = np.concatenate(calculation_orders, axis=0)
        feed_dict[self.calculation_masks] = np.concatenate(calculation_masks, axis=0)
        feed_dict[self.calculation_orders] = np.concatenate(
            calculation_orders, axis=0)
        feed_dict[self.calculation_masks] = np.concatenate(
            calculation_masks, axis=0)
        feed_dict[self.membership] = np.array(membership)
        feed_dict[self.n_atoms] = n_atoms
        yield feed_dict
+3 −5
Original line number Diff line number Diff line
@@ -677,10 +677,9 @@ class TestOverfit(test_util.TensorFlowTestCase):
    regression_metric = dc.metrics.Metric(
        dc.metrics.pearson_r2_score, task_averager=np.mean)
    n_tasks = y.shape[1]
    max_n_atoms = list(dataset.get_data_shape())[0]
    batch_size = 10

    graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=max_n_atoms)
    graph_model = dc.nn.SequentialDTNNGraph()
    graph_model.add(dc.nn.DTNNEmbedding(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
@@ -728,9 +727,8 @@ class TestOverfit(test_util.TensorFlowTestCase):
    transformer = dc.trans.DAGTransformer(max_atoms=50)
    dataset = transformer.transform(dataset)

    graph = dc.nn.SequentialDAGGraph(
        n_feat, batch_size=batch_size, max_atoms=50)
    graph.add(dc.nn.DAGLayer(30, n_feat, max_atoms=50))
    graph = dc.nn.SequentialDAGGraph(n_atom_feat=n_feat, max_atoms=50)
    graph.add(dc.nn.DAGLayer(30, n_feat, max_atoms=50, batch_size=batch_size))
    graph.add(dc.nn.DAGGather(max_atoms=50))

    model = dc.models.MultitaskGraphRegressor(
+2 −1
Original line number Diff line number Diff line
@@ -149,7 +149,8 @@ class SequentialDAGGraph(SequentialGraph):
        self.output = layer([self.output] +
                            self.graph_topology.get_topology_placeholders())
      elif type(layer).__name__ in ['DAGGather']:
        self.output = layer([self.output, self.graph_topology.membership_placeholder])
        self.output = layer(
            [self.output, self.graph_topology.membership_placeholder])
      else:
        self.output = layer(self.output)
      self.layers.append(layer)
Loading