Commit c6f381a5 authored by miaecle's avatar miaecle
Browse files

style change

parent e971ccb2
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -677,7 +677,8 @@ class TestOverfit(test_util.TensorFlowTestCase):
    w = np.ones_like(y)
    dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
    regression_metric = dc.metrics.Metric(
        dc.metrics.mean_absolute_error, mode="regression", task_averager=np.mean)
        dc.metrics.r2_score,
        task_averager=np.mean)
    n_tasks = y.shape[1]
    n_feat = list(dataset.get_data_shape())
    batch_size = 10
@@ -703,10 +704,9 @@ class TestOverfit(test_util.TensorFlowTestCase):
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [classification_metric])

    assert scores[classification_metric.name] < .2
    scores = model.evaluate(dataset, [regression_metric])

    assert scores[regression_metric.name] < .2

  def test_siamese_singletask_classification_overfit(self):
    """Test siamese singletask model overfits tiny data."""
+12 −3
Original line number Diff line number Diff line
@@ -79,13 +79,18 @@ class SequentialGraph(object):
  def get_layer(self, layer_id):
    return self.layers[layer_id]


class SequentialDTNNGraph(SequentialGraph):
  """An analog of Keras Sequential class for Coulomb Matrix data.

  automatically generates and passes topology placeholders to each layer. 
  """

  def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18.):
  def __init__(self,
               max_n_atoms,
               n_distance=100,
               distance_min=-1.,
               distance_max=18.):
    """
    Parameters
    ----------
@@ -101,7 +106,11 @@ class SequentialDTNNGraph(SequentialGraph):
    """
    self.graph = tf.Graph()
    with self.graph.as_default():
      self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance, distance_min=distance_min, distance_max=distance_max)
      self.graph_topology = DTNNGraphTopology(
          max_n_atoms,
          n_distance,
          distance_min=distance_min,
          distance_max=distance_max)
      self.output = self.graph_topology.get_atom_number_placeholder()
    # Keep track of the layers
    self.layers = []
@@ -116,13 +125,13 @@ class SequentialDTNNGraph(SequentialGraph):
        self.output = layer(self.output)
      self.layers.append(layer)


  def return_inputs(self):
    return self.graph_topology.get_atom_number_placeholders()

  def get_layer(self, layer_id):
    return self.layers[layer_id]


class SequentialSupportGraph(object):
  """An analog of Keras Sequential model for test/support models."""

+25 −16
Original line number Diff line number Diff line
@@ -141,10 +141,16 @@ class GraphTopology(object):
    }
    return merge_dicts([atoms_dict, deg_adj_dict])


class DTNNGraphTopology(GraphTopology):
  """Manages placeholders associated with batch of graphs and their topology"""

  def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18., name='DTNN_topology'):
  def __init__(self,
               max_n_atoms,
               n_distance=100,
               distance_min=-1.,
               distance_max=18.,
               name='DTNN_topology'):
    """
    Parameters
    ----------
@@ -180,8 +186,9 @@ class DTNNGraphTopology(GraphTopology):
        name=self.name + '_distance_matrix_mask')

    # Define the list of tensors to be used as topology
    self.topology = [self.distance_matrix_placeholder, 
                     self.distance_matrix_mask_placeholder]
    self.topology = [
        self.distance_matrix_placeholder, self.distance_matrix_mask_placeholder
    ]
    self.inputs = [self.atom_number_placeholder]
    self.inputs += self.topology

@@ -209,21 +216,24 @@ class DTNNGraphTopology(GraphTopology):
    """
    # Extract atom numbers
    atom_number = np.asarray(map(np.diag, batch))
    atom_number = np.asarray(np.round(np.power(2*atom_number, 1/2.4)), dtype=int)
    atom_number = np.asarray(
        np.round(np.power(2 * atom_number, 1 / 2.4)), dtype=int)
    ZiZj = []
    for molecule in atom_number:
      ZiZj.append(np.outer(molecule, molecule))
    ZiZj = np.asarray(ZiZj)
    distance_matrix = np.expand_dims(batch[:], axis=3)
    distance_matrix = np.concatenate([distance_matrix]*self.n_distance, axis=3)
    distance_matrix = np.concatenate(
        [distance_matrix] * self.n_distance, axis=3)
    distance_matrix_mask = batch[:]
    for im, molecule in enumerate(batch):
      for ir, row in enumerate(molecule):
        for ie, element in enumerate(row):
          if element > 0 and ir != ie:
            # expand a float value distance to a distance vector
            distance_matrix[im, ir, ie, :] = self.gauss_expand(ZiZj[im, ir, ie]/element, 
                self.n_distance, self.distance_min, self.distance_max)
            distance_matrix[im, ir, ie, :] = self.gauss_expand(
                ZiZj[im, ir, ie] / element, self.n_distance, self.distance_min,
                self.distance_max)
            distance_matrix_mask[im, ir, ie] = 1
          else:
            distance_matrix[im, ir, ie, :] = 0
@@ -242,4 +252,3 @@ class DTNNGraphTopology(GraphTopology):
    steps = np.array([distance_min + i * step_size for i in range(n_distance)])
    distance_vector = np.exp(-np.square(distance - steps) / (2 * step_size**2))
    return distance_vector
      
+11 −6
Original line number Diff line number Diff line
@@ -303,7 +303,8 @@ def benchmark_regression(
  test_scores = {}

  assert model in [
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg', 'DTNN'
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg',
      'DTNN'
  ]
  if hyper_parameters is None:
    hyper_parameters = hps[model]
@@ -412,12 +413,16 @@ def benchmark_regression(
    n_hidden = hyper_parameters['n_hidden']

    tf.set_random_seed(seed)
    graph_model = deepchem.nn.SequentialDTNNGraph(max_n_atoms=n_features[0], 
                                                  n_distance=n_distance)
    graph_model = deepchem.nn.SequentialDTNNGraph(
        max_n_atoms=n_features[0], n_distance=n_distance)
    graph_model.add(deepchem.nn.DTNNEmbedding(n_embedding=n_embedding))
    graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(deepchem.nn.DTNNGather(n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))
    graph_model.add(
        deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(
        deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(
        deepchem.nn.DTNNGather(
            n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))

    model = deepchem.models.DTNNRegressor(
        graph_model,
+45 −24
Original line number Diff line number Diff line
@@ -820,9 +820,11 @@ class LSTMStep(Layer):
    return h, [h, c]
    ####################################################### DEBUG


class DTNNEmbedding(Layer):
  """Generate embeddings for all atoms in the batch
  """

  def __init__(self,
               n_embedding=20,
               periodic_table_length=83,
@@ -836,7 +838,8 @@ class DTNNEmbedding(Layer):

  def build(self):

    self.embedding_list = self.init([self.periodic_table_length, self.n_embedding])
    self.embedding_list = self.init(
        [self.periodic_table_length, self.n_embedding])
    self.trainable_weights = [self.embedding_list]

  def call(self, x):
@@ -856,10 +859,12 @@ class DTNNEmbedding(Layer):
    atom_features = tf.nn.embedding_lookup(self.embedding_list, x)
    return atom_features


class DTNNStep(Layer):
  """A convolution step that merge in distance and atom info of 
     all other atoms into current atom.
  """

  def __init__(self,
               n_embedding=20,
               n_distance=100,
@@ -879,11 +884,16 @@ class DTNNStep(Layer):
    self.W_cf = self.init([self.n_embedding, self.n_hidden])
    self.W_df = self.init([self.n_distance, self.n_hidden])
    self.W_fc = self.init([self.n_hidden, self.n_embedding])
    self.b_cf = model_ops.zeros(shape=[self.n_hidden,])
    self.b_df = model_ops.zeros(shape=[self.n_hidden,])
    
    self.trainable_weights = [self.W_cf, self.W_df, self.W_fc, 
                              self.b_cf, self.b_df]
    self.b_cf = model_ops.zeros(shape=[
        self.n_hidden,
    ])
    self.b_df = model_ops.zeros(shape=[
        self.n_hidden,
    ])

    self.trainable_weights = [
        self.W_cf, self.W_df, self.W_fc, self.b_cf, self.b_df
    ]

  def call(self, x):
    """Execute this layer on input tensors.
@@ -904,8 +914,11 @@ class DTNNStep(Layer):
    atom_features = x[0]
    distance_matrix = x[1]
    distance_matrix_mask = x[2]
    outputs = tf.multiply((tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df),
        tf.expand_dims(tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf, axis=1))
    outputs = tf.multiply(
        (tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df),
        tf.expand_dims(
            tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf,
            axis=1))
    # for atom i in a molecule m, this step multiplies together distance info of atom pair(i,j)
    # and embeddings of atom j(both gone through a hidden layer)
    outputs = tf.tensordot(outputs, self.W_fc, [[3], [0]])
@@ -917,9 +930,11 @@ class DTNNStep(Layer):

    return outputs


class DTNNGather(Layer):
  """Map the atomic features into molecular properties and sum
  """

  def __init__(self,
               n_tasks=1,
               n_embedding=20,
@@ -943,8 +958,12 @@ class DTNNGather(Layer):
    for i in range(self.n_tasks):
      self.W_out1_list.append(self.init([self.n_embedding, self.n_hidden]))
      self.W_out2_list.append(self.init([self.n_hidden, 1]))
      self.b_out1_list.append(model_ops.zeros(shape=[self.n_hidden,]))
      self.b_out2_list.append(model_ops.zeros(shape=[1,]))
      self.b_out1_list.append(model_ops.zeros(shape=[
          self.n_hidden,
      ]))
      self.b_out2_list.append(model_ops.zeros(shape=[
          1,
      ]))

    self.trainable_weights = self.W_out1_list + self.W_out2_list + self.b_out1_list + self.b_out2_list

@@ -964,9 +983,11 @@ class DTNNGather(Layer):
    self.build()
    outputs = []
    for i in range(self.n_tasks):
      output = tf.tensordot(x, self.W_out1_list[i], [[2], [0]]) + self.b_out1_list[i]
      output = tf.tensordot(x, self.W_out1_list[i],
                            [[2], [0]]) + self.b_out1_list[i]
      output = self.activation(output)
      output = tf.tensordot(output, self.W_out2_list[i], [[2], [0]]) + self.b_out2_list[i]
      output = tf.tensordot(output, self.W_out2_list[i],
                            [[2], [0]]) + self.b_out2_list[i]
      # each task has one independent hidden layer
      output = tf.reduce_sum(tf.squeeze(output, axis=2), axis=1)
      outputs.append(output)
Loading