Commit adc38975 authored by miaecle's avatar miaecle
Browse files

temp save

parent de190ef8
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -166,15 +166,14 @@ class SequentialWeaveGraph(SequentialGraph):
  """SequentialGraph for Weave models
  """

  def __init__(self, batch_size, n_atom_feat=75, n_pair_feat=14, max_atoms=100):
  def __init__(self, max_atoms=50, n_atom_feat=75, n_pair_feat=14):
    self.graph = tf.Graph()
    self.batch_size = batch_size
    self.max_atoms = max_atoms
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    with self.graph.as_default():
      self.graph_topology = WeaveGraphTopology(self.batch_size, self.n_atom_feat,
                                               self.n_pair_feat, self.max_atoms)
      self.graph_topology = WeaveGraphTopology(self.max_atoms, self.n_atom_feat,
                                               self.n_pair_feat)
      self.output = self.graph_topology.get_atom_features_placeholder()
      self.output_P = self.graph_topology.get_pair_features_placeholder()
    self.layers = []
@@ -186,14 +185,18 @@ class SequentialWeaveGraph(SequentialGraph):
        self.output, self.output_P = layer([
            self.output, self.output_P
        ] + self.graph_topology.get_topology_placeholders())
      elif type(layer).__name__ in ['WeaveConcat']:
        self.output = layer(
            [self.output, self.graph_topology.atom_mask_placeholder])
      elif type(layer).__name__ in ['WeaveGather']:
        self.output = layer(
            [self.output, self.graph_topology.atom_split_placeholder])
            [self.output, self.graph_topology.membership_placeholder])
      else:
        self.output = layer(self.output)
      self.layers.append(layer)
     


class SequentialSupportGraph(object):
  """An analog of Keras Sequential model for test/support models."""

+39 −53
Original line number Diff line number Diff line
@@ -397,8 +397,8 @@ class DAGGraphTopology(GraphTopology):
class WeaveGraphTopology(GraphTopology):
  """Manages placeholders associated with batch of graphs and their topology"""

  def __init__(self, batch_size, n_atom_feat, n_pair_feat, 
               max_atoms=100, name='Weave_topology'):
  def __init__(self, max_atoms, n_atom_feat, n_pair_feat,
               name='Weave_topology'):
    """
    Parameters
    ----------
@@ -412,36 +412,30 @@ class WeaveGraphTopology(GraphTopology):

    #self.n_atoms = n_atoms
    self.name = name
    self.batch_size = batch_size
    self.max_atoms = max_atoms
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    self.max_atoms = max_atoms * batch_size

    self.atom_features_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.n_atom_feat),
        shape=(None, self.max_atoms, self.n_atom_feat),
        name=self.name + '_atom_features')
    self.atom_mask_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms),
        name=self.name + '_atom_mask')
    self.pair_features_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.n_pair_feat),
        shape=(None, self.max_atoms, self.max_atoms, self.n_pair_feat),
        name=self.name + '_pair_features')
    self.pair_split_placeholder = tf.placeholder(
        dtype='int32', shape=(self.max_atoms,), 
        name=self.name + '_pair_split')
    self.pair_membership_placeholder = tf.placeholder(
        dtype='bool', shape=(self.max_atoms,), 
        name=self.name + '_pair_membership')
    self.atom_split_placeholder = tf.placeholder(
        dtype='int32', shape=(self.batch_size,), 
        name=self.name + '_atom_split')
    self.atom_to_pair_placeholder = tf.placeholder(
        dtype='int32', shape=(None,2), 
        name=self.name + '_atom_to_pair')
    
    
    self.pair_mask_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms, self.max_atoms),
        name=self.name + '_pair_mask')
    self.membership_placeholder = tf.placeholder(
        dtype='int32', shape=(None,), name=self.name + '_membership')
    # Define the list of tensors to be used as topology
    self.topology = [self.pair_split_placeholder, self.pair_membership_placeholder,
                     self.atom_split_placeholder, self.atom_to_pair_placeholder]
    self.topology = [self.atom_mask_placeholder, self.pair_mask_placeholder]
    self.inputs = [self.atom_features_placeholder]
    self.inputs += self.topology

@@ -467,42 +461,34 @@ class WeaveGraphTopology(GraphTopology):
    # Extract atom numbers
    atom_feat = []
    pair_feat = []
    atom_split = []
    atom_to_pair = []
    pair_split = []
    atom_mask = []
    pair_mask = []
    membership = []
    max_atoms = self.max_atoms
    start = 0
    for im, mol in enumerate(batch):
      n_atoms = mol.get_num_atoms()
      # number of atoms in each molecule
      atom_split.append(n_atoms)
      # index of pair features
      C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
      atom_to_pair.append(np.transpose(np.array([C1.flatten()+start, C0.flatten()+start])))
      start = start + n_atoms
      # number of pairs for each atom
      pair_split.extend([n_atoms]*n_atoms)
      # atom features
      atom_feat.append(mol.get_atom_features())
      # pair features
      pair_feat.append(np.reshape(mol.get_pair_features(), 
                                  (n_atoms*n_atoms, self.n_pair_feat)))
      
    atom_feat = np.concatenate(atom_feat, axis=0)
    pair_feat = np.concatenate(pair_feat, axis=0)
    atom_to_pair = np.concatenate(atom_to_pair, axis=0)
    atom_split = np.array(atom_split)
    n_pair = len(pair_split)
    pair_split = np.pad(pair_split, ((0, max_atoms-n_pair)), 'constant')
    pair_membership = np.array([True]*n_pair + [False]*(max_atoms-n_pair))
    
      atom_feat.append(
          np.pad(mol.get_atom_features(), ((0, max_atoms - n_atoms), (0, 0)),
                 'constant'))
      atom_mask.append(
          np.array([1] * n_atoms + [0] * (max_atoms - n_atoms), dtype=float))
      pair_feat.append(
          np.pad(mol.get_pair_features(), ((0, max_atoms - n_atoms), (
              0, max_atoms - n_atoms), (0, 0)), 'constant'))
      pair_mask.append(np.array([[1]*n_atoms + [0]*(max_atoms-n_atoms)]*n_atoms + \
                       [[0]*max_atoms]*(max_atoms-n_atoms), dtype=float))
      membership.extend([im] * n_atoms)
    atom_feat = np.stack(atom_feat)
    pair_feat = np.stack(pair_feat)
    atom_mask = np.stack(atom_mask)
    pair_mask = np.stack(pair_mask)
    membership = np.array(membership)
    # Generate dicts
    dict_DTNN = {
        self.atom_features_placeholder: atom_feat,
        self.pair_features_placeholder: pair_feat,
        self.pair_split_placeholder: pair_split,
        self.pair_membership_placeholder: pair_membership,
        self.atom_split_placeholder: atom_split,
        self.atom_to_pair_placeholder: atom_to_pair
        self.atom_mask_placeholder: atom_mask,
        self.pair_mask_placeholder: pair_mask,
        self.membership_placeholder: membership
    }
    return dict_DTNN
+20 −10
Original line number Diff line number Diff line
@@ -266,11 +266,16 @@ def benchmark_classification(train_dataset,
    n_graph_feat = hyper_parameters['n_graph_feat']
    n_pair_feat = hyper_parameters['n_pair_feat']

    graph_model = deepchem.nn.SequentialWeaveGraph(
        batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=120)
    graph_model.add(deepchem.nn.WeaveLayer(75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(50, 50))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])
    
    graph_model = deepchem.nn.SequentialWeaveGraph(max_atoms=max_atoms,
        n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 50, 50, update_pair=False))
    graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
        deepchem.nn.WeaveGather(
@@ -585,11 +590,16 @@ def benchmark_regression(train_dataset,
    n_graph_feat = hyper_parameters['n_graph_feat']
    n_pair_feat = hyper_parameters['n_pair_feat']

    graph_model = deepchem.nn.SequentialWeaveGraph(
        batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=80)
    graph_model.add(deepchem.nn.WeaveLayer(75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(50, 50))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])
    
    graph_model = deepchem.nn.SequentialWeaveGraph(max_atoms=max_atoms,
        n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 50, 50, update_pair=False))
    graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
        deepchem.nn.WeaveGather(
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ from deepchem.nn.layers import DAGLayer
from deepchem.nn.layers import DAGGather

from deepchem.nn.weave_layers import WeaveLayer
from deepchem.nn.weave_layers import WeaveConcat
from deepchem.nn.weave_layers import WeaveGather

from deepchem.nn.model_ops import weight_decay
+122 −47
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ class WeaveLayer(Layer):
  """

  def __init__(self,
               max_atoms,
               n_atom_input_feat=75,
               n_pair_input_feat=14,
               n_atom_output_feat=50,
@@ -37,6 +38,7 @@ class WeaveLayer(Layer):
               n_hidden_PA=50,
               n_hidden_AP=50,
               n_hidden_PP=50,
               update_pair=True,
               init='glorot_uniform',
               activation='relu',
               dropout=None,
@@ -63,8 +65,10 @@ class WeaveLayer(Layer):

    """
    super(WeaveLayer, self).__init__(**kwargs)
    self.max_atoms = max_atoms
    self.init = initializations.get(init)  # Set weight initialization
    self.activation = activations.get(activation)  # Get activations
    self.update_pair = update_pair # last weave layer does not need to update
    self.n_hidden_AA = n_hidden_AA
    self.n_hidden_PA = n_hidden_PA
    self.n_hidden_AP = n_hidden_AP
@@ -96,30 +100,27 @@ class WeaveLayer(Layer):
        self.n_atom_output_feat,
    ])
    
    self.trainable_weights = [
        self.W_AA, self.b_AA, self.W_PA, self.b_PA, self.W_A, self.b_A
    ]
    if self.update_pair:
      self.W_AP = self.init([self.n_atom_input_feat * 2, self.n_hidden_AP])
    self.b_AP = model_ops.zeros(shape=[
        self.n_hidden_AP,
    ])
      self.b_AP = model_ops.zeros(shape=[self.n_hidden_AP,])

      self.W_PP = self.init([self.n_pair_input_feat, self.n_hidden_PP])
    self.b_PP = model_ops.zeros(shape=[
        self.n_hidden_PP,
    ])
      self.b_PP = model_ops.zeros(shape=[self.n_hidden_PP,])

      self.W_P = self.init([self.n_hidden_P, self.n_pair_output_feat])
    self.b_P = model_ops.zeros(shape=[
        self.n_pair_output_feat,
    ])
      self.b_P = model_ops.zeros(shape=[self.n_pair_output_feat,])
      
    self.trainable_weights = [
        self.W_AA, self.b_AA, self.W_PA, self.b_PA, self.W_A, self.b_A,
      self.trainable_weights.extend([
          self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P
    ]
      ])

  def call(self, x, mask=None):
    """Execute this layer on input tensors.

    x = [atom_features, pair_features, pair_split, pair_membership, atom_split]
    x = [atom_features, pair_features, atom_mask, pair_mask]
    
    Parameters
    ----------
@@ -141,39 +142,113 @@ class WeaveLayer(Layer):
    atom_features = x[0]
    pair_features = x[1]

    pair_split = x[2]
    pair_membership = x[3]
    atom_split = x[4]
    atom_to_pair = x[5]
    atom_mask = x[2]
    pair_mask = x[3]
    max_atoms = self.max_atoms

    AA = tf.matmul(atom_features, self.W_AA) + self.b_AA
    AA = tf.tensordot(atom_features, self.W_AA, [[2], [0]]) + self.b_AA
    AA = self.activation(AA)

    PA = tf.matmul(pair_features, self.W_PA) + self.b_PA
    PA = tf.reduce_sum(
        tf.tensordot(pair_features, self.W_PA, [[3], [0]]) + self.b_PA, axis=2)
    PA = self.activation(PA)
    PAs = tf.split(PA, pair_split, axis=0)
    PA = [tf.reduce_sum(molecule, 0) for molecule in PAs]
    PA = tf.boolean_mask(PA, pair_membership)
    
    A = tf.matmul(tf.concat([AA, PA], 1), self.W_A) + self.b_A
    A = tf.tensordot(tf.concat([AA, PA], 2), self.W_A, [[2], [0]]) + self.b_A
    A = self.activation(A)

    AP_ij = tf.matmul(tf.reshape(tf.gather(atom_features, atom_to_pair), 
                                 [-1, 2*self.n_atom_input_feat]), self.W_AP) + self.b_AP
    AP_ij = self.activation(AP_ij)
    AP_ji = tf.matmul(tf.reshape(tf.gather(atom_features, tf.reverse(atom_to_pair, [1])), 
                                 [-1, 2*self.n_atom_input_feat]), self.W_AP) + self.b_AP
    AP_ji = self.activation(AP_ji)
    
    PP = tf.matmul(pair_features, self.W_PP) + self.b_PP
    A = tf.multiply(A, tf.expand_dims(atom_mask, axis=2))
      
    if self.update_pair:
      AP_combine = tf.concat([
          tf.stack([atom_features] * max_atoms, axis=2),
          tf.stack([atom_features] * max_atoms, axis=1)
      ], 3)
      AP_combine_t = tf.transpose(AP_combine, perm=[0, 2, 1, 3])
      AP = tf.tensordot(AP_combine + AP_combine_t, self.W_AP,
                        [[3], [0]]) + self.b_AP
      AP = self.activation(AP)
      PP = tf.tensordot(pair_features, self.W_PP, [[3], [0]]) + self.b_PP
      PP = self.activation(PP)
    
    P = tf.matmul(tf.concat([AP_ij + AP_ji, PP], 1), self.W_P) + self.b_P
      P = tf.tensordot(tf.concat([AP, PP], 3), self.W_P, [[3], [0]]) + self.b_P
      P = self.activation(P)
      P = tf.multiply(P, tf.expand_dims(pair_mask, axis=3))
    else:
      P = pair_features
      
    return A, P


class WeaveConcat(Layer):
  """" Concat a batch of molecules into a batch of atoms
  """

  def __init__(self,
               batch_size,
               n_atom_input_feat=50,
               n_output=128,
               init='glorot_uniform',
               activation='tanh',
               **kwargs):
    """
    Parameters
    ----------
    batch_size: int
      number of molecules in a batch
    n_atom_input_feat: int, optional
      Number of features for each atom in input.
    n_output: int, optional
      Number of output features for each atom(concatenated)
    init: str, optional
      Weight initialization for filters.
    activation: str, optional
      Activation function applied

    """
    self.batch_size = batch_size
    self.n_atom_input_feat = n_atom_input_feat
    self.n_output = n_output
    self.init = initializations.get(init)  # Set weight initialization
    self.activation = activations.get(activation)  # Get activations
    super(WeaveConcat, self).__init__(**kwargs)

  def build(self):
    """"Construct internal trainable weights.
    """

    self.W = self.init([self.n_atom_input_feat, self.n_output])
    self.b = model_ops.zeros(shape=[
        self.n_output,
    ])

    self.trainable_weights = self.W + self.b

  def call(self, x, mask=None):
    """Execute this layer on input tensors.
    
    x = [atom_features, atom_mask]
    
    Parameters
    ----------
    x: list
      Tensors as listed above
    mask: bool, optional
      Ignored. Present only to shadow superclass call() method.

    Returns
    -------
    outputs: Tensor
      Tensor of concatenated atom features
    """
    self.build()
    atom_features = x[0]
    atom_masks = x[1]
    A = tf.split(atom_features, self.batch_size, axis=0)
    A_mask = tf.split(
        tf.cast(atom_masks, dtype=tf.bool), self.batch_size, axis=0)
    outputs = tf.concat(
        [tf.boolean_mask(A[i], A_mask[i]) for i in range(len(A))], axis=0)
    outputs = tf.matmul(outputs, self.W) + self.b
    outputs = self.activation(outputs)
    return outputs


class WeaveGather(Layer):
  """" Gather layer of Weave model
  a batch of normalized atom features go through a hidden layer, 
@@ -220,7 +295,7 @@ class WeaveGather(Layer):
  def call(self, x, mask=None):
    """Execute this layer on input tensors.

    x = [atom_features, atom_split]
    x = [atom_features, membership]
    
    Parameters
    ----------
@@ -237,12 +312,12 @@ class WeaveGather(Layer):
    # Add trainable weights
    self.build()
    outputs = x[0]
    atom_split = x[1]
    membership = x[1]

    if self.gaussian_expand:
      outputs = self.gaussian_histogram(outputs)

    outputs = tf.split(outputs, atom_split, axis=0)
    outputs = tf.dynamic_partition(outputs, membership, self.batch_size)

    output_molecules = [tf.reduce_sum(molecule, 0) for molecule in outputs]