Commit de190ef8 authored by miaecle's avatar miaecle
Browse files

temp save

parent 9aaff43b
Loading
Loading
Loading
Loading
+12 −10
Original line number Diff line number Diff line
@@ -220,7 +220,7 @@ Index splitting
|           |MT-NN classification|0.934              |0.830              |
|           |Robust MT-NN        |0.949              |0.827              |
|           |Graph convolution   |0.946              |0.860              |
|           |DAG                 |0.953              |0.775              |
|           |Weave               |0.907              |0.879              |
|hiv        |Logistic regression |0.864              |0.739              |
|           |Random forest       |0.999              |0.720              |
|           |XGBoost             |0.917              |0.745              |
@@ -252,7 +252,7 @@ Index splitting
|           |MT-NN classification|0.856              |0.763              |
|           |Robust MT-NN        |0.857              |0.767              |
|           |Graph convolution   |0.872              |0.798              |
|           |DAG                 |0.831              |0.750              |
|           |Weave               |0.810              |0.778              |
|toxcast    |Logistic regression |0.721              |0.575              |
|           |XGBoost             |0.738              |0.621              |
|           |MT-NN classification|0.830              |0.678              |
@@ -282,7 +282,7 @@ Random splitting
|           |MT-NN classification|0.951              |0.834              |
|           |Robust MT-NN        |0.959              |0.830              |
|           |Graph convolution   |0.975              |0.876              |
|           |DAG                 |0.917              |0.744              |
|           |Weave               |0.890              |0.738              |
|hiv        |Logistic regression |0.860              |0.806              |
|           |Random forest       |0.999              |0.850              |
|           |XGBoost             |0.933              |0.841              |
@@ -313,7 +313,7 @@ Random splitting
|           |MT-NN classification|0.844              |0.795              |
|           |Robust MT-NN        |0.855              |0.773              |
|           |Graph convolution   |0.865              |0.827              |
|           |DAG                 |0.872              |0.758              |
|           |Weave               |0.796              |0.781              |
|toxcast    |Logistic regression |0.725        	     |0.586              |
|           |XGBoost             |0.738              |0.633              |
|           |MT-NN classification|0.836        	     |0.684              |
@@ -343,7 +343,7 @@ Scaffold splitting
|           |MT-NN classification|0.937              |0.828              |
|           |Robust MT-NN        |0.956              |0.821              |
|           |Graph convolution   |0.965              |0.900              |
|           |DAG                 |0.925              |0.703              |
|           |Weave               |0.888              |0.873              |
|hiv        |Logistic regression |0.858              |0.798              |
|           |Random forest       |0.946              |0.562              |
|           |XGBoost             |0.927              |0.830              |
@@ -374,7 +374,7 @@ Scaffold splitting
|           |MT-NN classification|0.863              |0.703              |
|           |Robust MT-NN        |0.861              |0.710              |
|           |Graph convolution   |0.885              |0.732              |
|           |DAG                 |0.861              |0.670              |
|           |Weave               |0.812              |0.727              |
|toxcast    |Logistic regression |0.716              |0.492              |
|           |XGBoost             |0.741              |0.587              |
|           |MT-NN classification|0.828              |0.617              |
@@ -411,17 +411,17 @@ Scaffold splitting
|                |XGBoost             |Index       |0.898         |0.664         |
|                |NN regression       |Index       |0.868         |0.578         |
|                |Graphconv regression|Index       |0.967         |0.790         |
|                |DAG regression      |Index       |0.921         |0.827         |
|                |Weave regression    |Index       |0.967         |0.860         |
|                |Random forest       |Random      |0.951         |0.684         |
|                |XGBoost             |Random      |0.927         |0.727         |
|                |NN regression       |Random      |0.865         |0.574         |
|                |Graphconv regression|Random      |0.964         |0.782         |
|                |DAG regression      |Random      |0.898         |0.857         |
|                |Weave regression    |Random      |0.965         |0.925         |
|                |Random forest       |Scaffold    |0.953         |0.284         |
|                |XGBoost             |Scaffold    |0.890         |0.316         |
|                |NN regression       |Scaffold    |0.866         |0.342         |
|                |Graphconv regression|Scaffold    |0.967         |0.606         |
|                |DAG regression      |Scaffold    |0.931         |0.647         |
|                |Weave regression    |Scaffold    |0.968         |0.752         |
|hopv            |Random forest       |Index       |0.943         |0.338         |
|                |MT-NN regression    |Index       |0.725         |0.293         |
|                |Graphconv regression|Index       |0.307         |0.284         |
@@ -496,15 +496,17 @@ Scaffold splitting
|                |XGBoost             |Index       |0.884         |0.784         |
|                |NN regression       |Index       |0.917         |0.764         |
|                |Graphconv regression|Index       |0.982         |0.903         |
|                |DAG regression      |Index       |0.891         |0.777         | 
|                |Weave regression    |Index       |0.986         |0.924         | 
|                |Random forest       |Random      |0.967         |0.752         |
|                |XGBoost             |Random      |0.906         |0.745         |
|                |NN regression       |Random      |0.908         |0.711         |
|                |Graphconv regression|Random      |0.987         |0.868         |
|                |Weave regression    |Random      |0.997         |0.888         |
|                |Random forest       |Scaffold    |0.966         |0.477         |
|                |XGBoost             |Scaffold    |0.918         |0.439         |
|                |NN regression       |Scaffold    |0.891         |0.217         |
|                |Graphconv regression|Scaffold    |0.985         |0.666         |
|                |Weave regression    |Scaffold    |0.991         |0.833         |

|Dataset         |Model            |Splitting   |Train score/MAE(kcal/mol)|Valid score/MAE(kcal/mol)|
|----------------|-----------------|------------|-------------------------|-------------------------|
+5 −7
Original line number Diff line number Diff line
@@ -166,14 +166,15 @@ class SequentialWeaveGraph(SequentialGraph):
  """SequentialGraph for Weave models
  """

  def __init__(self, max_atoms=50, n_atom_feat=75, n_pair_feat=14):
  def __init__(self, batch_size, n_atom_feat=75, n_pair_feat=14, max_atoms=100):
    self.graph = tf.Graph()
    self.batch_size = batch_size
    self.max_atoms = max_atoms
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    with self.graph.as_default():
      self.graph_topology = WeaveGraphTopology(self.max_atoms, self.n_atom_feat,
                                               self.n_pair_feat)
      self.graph_topology = WeaveGraphTopology(self.batch_size, self.n_atom_feat,
                                               self.n_pair_feat, self.max_atoms)
      self.output = self.graph_topology.get_atom_features_placeholder()
      self.output_P = self.graph_topology.get_pair_features_placeholder()
    self.layers = []
@@ -185,12 +186,9 @@ class SequentialWeaveGraph(SequentialGraph):
        self.output, self.output_P = layer([
            self.output, self.output_P
        ] + self.graph_topology.get_topology_placeholders())
      elif type(layer).__name__ in ['WeaveConcat']:
        self.output = layer(
            [self.output, self.graph_topology.atom_mask_placeholder])
      elif type(layer).__name__ in ['WeaveGather']:
        self.output = layer(
            [self.output, self.graph_topology.membership_placeholder])
            [self.output, self.graph_topology.atom_split_placeholder])
      else:
        self.output = layer(self.output)
      self.layers.append(layer)
+53 −39
Original line number Diff line number Diff line
@@ -397,8 +397,8 @@ class DAGGraphTopology(GraphTopology):
class WeaveGraphTopology(GraphTopology):
  """Manages placeholders associated with batch of graphs and their topology"""

  def __init__(self, max_atoms, n_atom_feat, n_pair_feat,
               name='Weave_topology'):
  def __init__(self, batch_size, n_atom_feat, n_pair_feat, 
               max_atoms=100, name='Weave_topology'):
    """
    Parameters
    ----------
@@ -412,30 +412,36 @@ class WeaveGraphTopology(GraphTopology):

    #self.n_atoms = n_atoms
    self.name = name
    self.max_atoms = max_atoms
    self.batch_size = batch_size
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    self.max_atoms = max_atoms * batch_size

    self.atom_features_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms, self.n_atom_feat),
        shape=(None, self.n_atom_feat),
        name=self.name + '_atom_features')
    self.atom_mask_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms),
        name=self.name + '_atom_mask')
    self.pair_features_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms, self.max_atoms, self.n_pair_feat),
        shape=(None, self.n_pair_feat),
        name=self.name + '_pair_features')
    self.pair_mask_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_atoms, self.max_atoms),
        name=self.name + '_pair_mask')
    self.membership_placeholder = tf.placeholder(
        dtype='int32', shape=(None,), name=self.name + '_membership')
    self.pair_split_placeholder = tf.placeholder(
        dtype='int32', shape=(self.max_atoms,), 
        name=self.name + '_pair_split')
    self.pair_membership_placeholder = tf.placeholder(
        dtype='bool', shape=(self.max_atoms,), 
        name=self.name + '_pair_membership')
    self.atom_split_placeholder = tf.placeholder(
        dtype='int32', shape=(self.batch_size,), 
        name=self.name + '_atom_split')
    self.atom_to_pair_placeholder = tf.placeholder(
        dtype='int32', shape=(None,2), 
        name=self.name + '_atom_to_pair')
    
    
    # Define the list of tensors to be used as topology
    self.topology = [self.atom_mask_placeholder, self.pair_mask_placeholder]
    self.topology = [self.pair_split_placeholder, self.pair_membership_placeholder,
                     self.atom_split_placeholder, self.atom_to_pair_placeholder]
    self.inputs = [self.atom_features_placeholder]
    self.inputs += self.topology

@@ -461,34 +467,42 @@ class WeaveGraphTopology(GraphTopology):
    # Extract atom numbers
    atom_feat = []
    pair_feat = []
    atom_mask = []
    pair_mask = []
    membership = []
    atom_split = []
    atom_to_pair = []
    pair_split = []
    max_atoms = self.max_atoms
    start = 0
    for im, mol in enumerate(batch):
      n_atoms = mol.get_num_atoms()
      atom_feat.append(
          np.pad(mol.get_atom_features(), ((0, max_atoms - n_atoms), (0, 0)),
                 'constant'))
      atom_mask.append(
          np.array([1] * n_atoms + [0] * (max_atoms - n_atoms), dtype=float))
      pair_feat.append(
          np.pad(mol.get_pair_features(), ((0, max_atoms - n_atoms), (
              0, max_atoms - n_atoms), (0, 0)), 'constant'))
      pair_mask.append(np.array([[1]*n_atoms + [0]*(max_atoms-n_atoms)]*n_atoms + \
                       [[0]*max_atoms]*(max_atoms-n_atoms), dtype=float))
      membership.extend([im] * n_atoms)
    atom_feat = np.stack(atom_feat)
    pair_feat = np.stack(pair_feat)
    atom_mask = np.stack(atom_mask)
    pair_mask = np.stack(pair_mask)
    membership = np.array(membership)
      # number of atoms in each molecule
      atom_split.append(n_atoms)
      # index of pair features
      C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
      atom_to_pair.append(np.transpose(np.array([C1.flatten()+start, C0.flatten()+start])))
      start = start + n_atoms
      # number of pairs for each atom
      pair_split.extend([n_atoms]*n_atoms)
      # atom features
      atom_feat.append(mol.get_atom_features())
      # pair features
      pair_feat.append(np.reshape(mol.get_pair_features(), 
                                  (n_atoms*n_atoms, self.n_pair_feat)))
      
    atom_feat = np.concatenate(atom_feat, axis=0)
    pair_feat = np.concatenate(pair_feat, axis=0)
    atom_to_pair = np.concatenate(atom_to_pair, axis=0)
    atom_split = np.array(atom_split)
    n_pair = len(pair_split)
    pair_split = np.pad(pair_split, ((0, max_atoms-n_pair)), 'constant')
    pair_membership = np.array([True]*n_pair + [False]*(max_atoms-n_pair))
    
    # Generate dicts
    dict_DTNN = {
        self.atom_features_placeholder: atom_feat,
        self.pair_features_placeholder: pair_feat,
        self.atom_mask_placeholder: atom_mask,
        self.pair_mask_placeholder: pair_mask,
        self.membership_placeholder: membership
        self.pair_split_placeholder: pair_split,
        self.pair_membership_placeholder: pair_membership,
        self.atom_split_placeholder: atom_split,
        self.atom_to_pair_placeholder: atom_to_pair
    }
    return dict_DTNN
+1 −1
Original line number Diff line number Diff line
@@ -67,7 +67,7 @@ hps['dag'] = {
}
hps['weave'] = {
    'batch_size': 64,
    'nb_epoch': 50,
    'nb_epoch': 40,
    'learning_rate': 0.001,
    'n_graph_feat': 128,
    'n_pair_feat': 14,
+10 −16
Original line number Diff line number Diff line
@@ -266,15 +266,11 @@ def benchmark_classification(train_dataset,
    n_graph_feat = hyper_parameters['n_graph_feat']
    n_pair_feat = hyper_parameters['n_pair_feat']

    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    graph_model = deepchem.nn.SequentialWeaveGraph(
        max_atoms=max_atoms, n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
        batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=120)
    graph_model.add(deepchem.nn.WeaveLayer(75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(50, 50))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
        deepchem.nn.WeaveGather(
@@ -286,6 +282,7 @@ def benchmark_classification(train_dataset,
        n_features,
        batch_size=batch_size,
        learning_rate=learning_rate,
        learning_rate_decay_time=1000,
        optimizer_type="adam",
        beta1=.9,
        beta2=.999)
@@ -588,15 +585,11 @@ def benchmark_regression(train_dataset,
    n_graph_feat = hyper_parameters['n_graph_feat']
    n_pair_feat = hyper_parameters['n_pair_feat']

    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    graph_model = deepchem.nn.SequentialWeaveGraph(
        max_atoms=max_atoms, n_atom_feat=n_features, n_pair_feat=n_pair_feat)
    graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
    graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
        batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=80)
    graph_model.add(deepchem.nn.WeaveLayer(75, 14))
    graph_model.add(deepchem.nn.WeaveLayer(50, 50))
    graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
    graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
    graph_model.add(
        deepchem.nn.WeaveGather(
@@ -608,6 +601,7 @@ def benchmark_regression(train_dataset,
        n_features,
        batch_size=batch_size,
        learning_rate=learning_rate,
        learning_rate_decay_time=1000,
        optimizer_type="adam",
        beta1=.9,
        beta2=.999)
Loading