Commit 6808c495 authored by miaecle's avatar miaecle
Browse files

refining model

parent 6be5b3a5
Loading
Loading
Loading
Loading
+13 −21
Original line number Diff line number Diff line
@@ -80,23 +80,28 @@ class SequentialGraph(object):
    return self.layers[layer_id]

class SequentialDTNNGraph(SequentialGraph):
  """An analog of Keras Sequential class for Graph data.
  """An analog of Keras Sequential class for Coulomb Matrix data.

  Like the Sequential class from Keras, but automatically passes topology
  placeholders from GraphTopology to each graph layer (from layers) added
  to the network. Non graph layers don't get the extra placeholders. 
  automatically generates and passes topology placeholders to each layer. 
  """

  def __init__(self, max_n_atoms=30, n_distance=100):
  def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18.):
    """
    Parameters
    ----------
    n_feat: int
      Number of features per atom.
    max_n_atoms: int
      maximum number of atoms in a molecule
    n_distance: int, optional
      granularity of distance matrix
      step size will be (distance_max-distance_min)/n_distance
    distance_min: float, optional
      minimum distance of atom pairs, default = -1 Angstorm
    distance_max: float, optional
      maximum distance of atom pairs, default = 18 Angstorm
    """
    self.graph = tf.Graph()
    with self.graph.as_default():
      self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance)
      self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance, distance_min=distance_min, distance_max=distance_max)
      self.output = self.graph_topology.get_atom_number_placeholder()
    # Keep track of the layers
    self.layers = []
@@ -104,24 +109,11 @@ class SequentialDTNNGraph(SequentialGraph):
  def add(self, layer):
    """Adds a new layer to model."""
    with self.graph.as_default():
      ############################################# DEBUG
      #print("start - add()")
      #print("self.output")
      #print(self.output)
      ############################################# DEBUG
      # For graphical layers, add connectivity placeholders 
      if type(layer).__name__ in ['DTNNStep']:
        self.output = layer([self.output] +
                            self.graph_topology.get_topology_placeholders())
      else:
        self.output = layer(self.output)
      ############################################# DEBUG
      #print("end- add()")
      #print("self.output")
      #print(self.output)
      ############################################# DEBUG

      # Add layer to the layer list
      self.layers.append(layer)


+21 −21
Original line number Diff line number Diff line
@@ -144,28 +144,27 @@ class GraphTopology(object):
class DTNNGraphTopology(GraphTopology):
  """Manages placeholders associated with batch of graphs and their topology"""

  def __init__(self, max_n_atoms=30, n_distance=100, name='DTNN_topology'):
  def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18., name='DTNN_topology'):
    """
    Note that batch size is not specified in a GraphTopology object. A batch
    of molecules must be combined into a disconnected graph and fed to topology
    directly to handle batches.

    Parameters
    ----------
    n_feat: int
      Number of features per atom.
    name: str, optional
      Name of this manager.
    max_deg: int, optional
      Maximum #bonds for atoms in molecules.
    min_deg: int, optional
      Minimum #bonds for atoms in molecules.
    max_n_atoms: int
      maximum number of atoms in a molecule
    n_distance: int, optional
      granularity of distance matrix
      step size will be (distance_max-distance_min)/n_distance
    distance_min: float, optional
      minimum distance of atom pairs, default = -1 Angstorm
    distance_max: float, optional
      maximum distance of atom pairs, default = 18 Angstorm
    """

    #self.n_atoms = n_atoms
    self.name = name
    self.max_n_atoms = max_n_atoms
    self.n_distance = n_distance
    self.distance_min = distance_min
    self.distance_max = distance_max

    self.atom_number_placeholder = tf.placeholder(
        dtype='int32', 
@@ -193,22 +192,22 @@ class DTNNGraphTopology(GraphTopology):
    return self.distance_matrix_placeholder

  def batch_to_feed_dict(self, batch):
    """Converts the current batch of mol_graphs into tensorflow feed_dict.
    """Converts the current batch of Coulomb Matrix into tensorflow feed_dict.

    Assigns the graph information in array of ConvMol objects to the
    Assigns the atom number and distance info to the
    placeholders tensors

    params
    ------
    batch : np.ndarray
      Array of ConvMol objects
      Array of Coulomb Matrix

    returns
    -------
    feed_dict : dict
      Can be merged with other feed_dicts for input into tensorflow
    """
    # Merge mol conv objects
    # Extract atom numbers
    atom_number = np.asarray(map(np.diag, batch))
    atom_number = np.asarray(np.round(np.power(2*atom_number, 1/2.4)), dtype=int)
    ZiZj = []
@@ -222,8 +221,9 @@ class DTNNGraphTopology(GraphTopology):
      for ir, row in enumerate(molecule):
        for ie, element in enumerate(row):
          if element>0 and ir != ie:
            # expand a float value distance to a distance vector
            distance_matrix[im, ir, ie, :] = self.gauss_expand(ZiZj[im, ir, ie]/element, 
                                                               self.n_distance)
                self.n_distance, self.distance_min, self.distance_max)
            distance_matrix_mask[im, ir, ie] = 1
          else:
            distance_matrix[im, ir, ie, :] = 0
@@ -237,7 +237,7 @@ class DTNNGraphTopology(GraphTopology):
    return dict_DTNN
    
  @staticmethod
  def gauss_expand(distance, n_distance, distance_min=-1., distance_max=18.):
  def gauss_expand(distance, n_distance, distance_min, distance_max):
    step_size = (distance_max - distance_min)/n_distance
    steps = np.array([distance_min+i*step_size for i in range(n_distance)])
    distance_vector = np.exp(-np.square(distance - steps)/(2*step_size**2))
+2 −1
Original line number Diff line number Diff line
@@ -95,7 +95,8 @@ hps['DTNN'] = {
    'batch_size': 128,
    'nb_epoch': 20,
    'learning_rate': 0.0005,
    'n_hidden': 20,
    'n_embedding': 20,
    'n_hidden': 50,
    'n_distance': 100,
    'seed': 123
}
+2 −2
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ def run_benchmark(datasets,
        n_features = 75
      elif model in [
          'tf', 'tf_robust', 'logreg', 'rf', 'irv', 'tf_regression',
          'rf_regression'
          'rf_regression', 'DTNN'
      ]:
        featurizer = 'ECFP'
        n_features = 1024
@@ -99,7 +99,7 @@ def run_benchmark(datasets,
          return
      elif dataset in ['qm7', 'qm7b', 'qm9']:
        featurizer = None  # qm* datasets are already featurized
        if isinstance(model, str) and not model in ['tf_regression']:
        if isinstance(model, str) and not model in ['tf_regression', 'DTNN']:
          return
        elif model in ['tf_regression']:
          model = 'tf_regression_ft'
+10 −9
Original line number Diff line number Diff line
@@ -303,7 +303,7 @@ def benchmark_regression(
  test_scores = {}

  assert model in [
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg'
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg', 'DTNN'
  ]
  if hyper_parameters is None:
    hyper_parameters = hps[model]
@@ -408,17 +408,18 @@ def benchmark_regression(
    nb_epoch = hyper_parameters['nb_epoch']
    learning_rate = hyper_parameters['learning_rate']
    n_distance = hyper_parameters['n_distance']
    n_embedding = hyper_parameters['n_embedding']
    n_hidden = hyper_parameters['n_hidden']

    tf.set_random_seed(seed)
    graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=n_features[0], 
    graph_model = deepchem.nn.SequentialDTNNGraph(max_n_atoms=n_features[0], 
                                                  n_distance=n_distance)
    graph_model.add(dc.nn.DTNNEmbedding(n_features=n_hidden))
    graph_model.add(dc.nn.DTNNStep(n_features=n_hidden, n_distance=n_distance))
    graph_model.add(dc.nn.DTNNStep(n_features=n_hidden, n_distance=n_distance))
    graph_model.add(dc.nn.DTNNGather(n_tasks=len(tasks)))
    graph_model.add(deepchem.nn.DTNNEmbedding(n_embedding=n_embedding))
    graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(deepchem.nn.DTNNGather(n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))

    model = dc.models.DTNNRegressor(
    model = deepchem.models.DTNNRegressor(
        graph_model,
        n_tasks=len(tasks),
        batch_size=batch_size,
Loading