Commit 005942c3 authored by miaecle's avatar miaecle
Browse files

add masking

parent 00942093
Loading
Loading
Loading
Loading
+0 −82
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 13 14:42:40 2017

@author: zqwu
"""
import os
import numpy as np
import tensorflow as tf
import sklearn.metrics
import tempfile
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor


class DTNNRegressor(MultitaskGraphRegressor):

  def __init__(self,
               model,
               n_tasks=1,
               logdir=None,
               batch_size=50,
               final_loss='weighted_L2',
               learning_rate=.001,
               optimizer_type="adam",
               learning_rate_decay_time=1000,
               beta1=.9,
               beta2=.999,
               pad_batches=True,
               verbose=True):
    self.n_tasks = n_tasks
    self.verbose = verbose
    self.n_tasks = n_tasks
    self.final_loss = final_loss
    self.model = model
    self.sess = tf.Session(graph=self.model.graph)
    if logdir is not None:
      if not os.path.exists(logdir):
        os.makedirs(logdir)
    else:
      logdir = tempfile.mkdtemp()
    self.logdir = logdir

    with self.model.graph.as_default():
      # Extract model info 
      self.batch_size = batch_size
      self.pad_batches = pad_batches
      # Get graph topology for x
      self.graph_topology = self.model.get_graph_topology()

      # Building outputs
      self.outputs = self.build()
      self.loss_op = self.add_training_loss(self.final_loss, self.outputs)

      self.learning_rate = learning_rate
      self.T = learning_rate_decay_time
      self.optimizer_type = optimizer_type

      self.optimizer_beta1 = beta1
      self.optimizer_beta2 = beta2

      # Set epsilon
      self.epsilon = 1e-7
      self.add_optimizer()

      # Initialize
      self.init_fn = tf.global_variables_initializer()
      self.sess.run(self.init_fn)

      # Path to save checkpoint files, which matches the
      # replicated supervisor's default path.
      self._save_path = os.path.join(logdir, 'model.ckpt')

  def build(self):
    # Create target inputs
    self.label_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="label_placeholder")
    self.weight_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="weight_placholder")

    outputs = self.model.return_outputs()
    return outputs
+3 −3
Original line number Diff line number Diff line
@@ -121,6 +121,9 @@ class SequentialDTNNGraph(SequentialGraph):
      if type(layer).__name__ in ['DTNNStep']:
        self.output = layer([self.output] +
                            self.graph_topology.get_topology_placeholders())
      elif type(layer).__name__ in ['DTNNGather']:
        self.output = layer(
            [self.output, self.graph_topology.atom_mask_placeholder])
      else:
        self.output = layer(self.output)
      self.layers.append(layer)
@@ -128,9 +131,6 @@ class SequentialDTNNGraph(SequentialGraph):
  def return_inputs(self):
    return self.graph_topology.get_atom_number_placeholders()

  def get_layer(self, layer_id):
    return self.layers[layer_id]


class SequentialSupportGraph(object):
  """An analog of Keras Sequential model for test/support models."""
+7 −1
Original line number Diff line number Diff line
@@ -176,12 +176,16 @@ class DTNNGraphTopology(GraphTopology):
        dtype='int32',
        shape=(None, self.max_n_atoms),
        name=self.name + '_atom_number')
    self.atom_mask_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_n_atoms),
        name=self.name + '_atom_mask')
    self.distance_matrix_placeholder = tf.placeholder(
        dtype='float32',
        shape=(None, self.max_n_atoms, self.max_n_atoms, self.n_distance),
        name=self.name + '_distance_matrix')
    self.distance_matrix_mask_placeholder = tf.placeholder(
        dtype=tf.float32,
        dtype='float32',
        shape=(None, self.max_n_atoms, self.max_n_atoms),
        name=self.name + '_distance_matrix_mask')

@@ -216,6 +220,7 @@ class DTNNGraphTopology(GraphTopology):
    """
    # Extract atom numbers
    atom_number = np.asarray(map(np.diag, batch))
    atom_mask = np.sign(atom_number)
    atom_number = np.asarray(
        np.round(np.power(2 * atom_number, 1 / 2.4)), dtype=int)
    ZiZj = []
@@ -241,6 +246,7 @@ class DTNNGraphTopology(GraphTopology):
    # Generate dicts
    dict_DTNN = {
        self.atom_number_placeholder: atom_number,
        self.atom_mask_placeholder: atom_mask,
        self.distance_matrix_placeholder: distance_matrix,
        self.distance_matrix_mask_placeholder: distance_matrix_mask
    }
+0 −9
Original line number Diff line number Diff line
@@ -91,12 +91,3 @@ hps['graphconvreg'] = {
    'n_fully_connected_nodes': 256,
    'seed': 123
}
hps['DTNN'] = {
    'batch_size': 128,
    'nb_epoch': 20,
    'learning_rate': 0.0005,
    'n_embedding': 20,
    'n_hidden': 50,
    'n_distance': 100,
    'seed': 123
}
+3 −36
Original line number Diff line number Diff line
@@ -303,8 +303,7 @@ def benchmark_regression(
  test_scores = {}

  assert model in [
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg',
      'DTNN'
      'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg'
  ]
  if hyper_parameters is None:
    hyper_parameters = hps[model]
@@ -364,7 +363,7 @@ def benchmark_regression(
        n_eval=10,
        seed=seed)

  elif model_name == 'graphconvreg':
  if model_name == 'graphconvreg':
    # Initialize model folder

    # Loading hyper parameters
@@ -401,39 +400,7 @@ def benchmark_regression(
        beta1=.9,
        beta2=.999)

  elif model_name == 'DTNN':
    # Initialize model folder

    # Loading hyper parameters
    batch_size = hyper_parameters['batch_size']
    nb_epoch = hyper_parameters['nb_epoch']
    learning_rate = hyper_parameters['learning_rate']
    n_distance = hyper_parameters['n_distance']
    n_embedding = hyper_parameters['n_embedding']
    n_hidden = hyper_parameters['n_hidden']

    tf.set_random_seed(seed)
    graph_model = deepchem.nn.SequentialDTNNGraph(
        max_n_atoms=n_features[0], n_distance=n_distance)
    graph_model.add(deepchem.nn.DTNNEmbedding(n_embedding=n_embedding))
    graph_model.add(
        deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(
        deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
    graph_model.add(
        deepchem.nn.DTNNGather(
            n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))

    model = deepchem.models.DTNNRegressor(
        graph_model,
        n_tasks=len(tasks),
        batch_size=batch_size,
        learning_rate=learning_rate,
        optimizer_type="adam",
        beta1=.9,
        beta2=.999)

  elif model_name == 'rf_regression':
  if model_name == 'rf_regression':
    # Loading hyper parameters
    n_estimators = hyper_parameters['n_estimators']
    nb_epoch = None
Loading