Commit b4ad5857 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Removing model_params

parent 3ef691a7
Loading
Loading
Loading
Loading
+25 −25
Original line number Diff line number Diff line
@@ -27,6 +27,15 @@ class TensorflowGraph(object):
    penalty
    nb_epoch
    pad_batches
    penalty_type
    optimizer
    learning_rate
    momentum
    data_shape
    layer_sizes
    weight_init_stddevs
    bias_init_consts
    dropouts

  Classifier:
    num_classes
@@ -52,7 +61,6 @@ class TensorflowGraph(object):
      normalization. Should be set to tf.no_op() if no updates are required.

  This base class provides the following attributes:
    model_params: dictionary containing model configuration parameters.
    graph: TensorFlow graph object.
    logdir: Path to the file output directory to store checkpoints etc.
    master: TensorFlow session master specification string.
@@ -62,25 +70,24 @@ class TensorflowGraph(object):
      mask when calculating gradient costs.

  Args:
    model_params: dictionary.
    train: If True, model is in training mode.
    logdir: Directory for output files.
  """

  def __init__(self, model_params, logdir, tasks, task_types, train=True,
               verbosity=None):
  def __init__(self, n_tasks, n_inputs, layer_sizes=[1000],
               weight_init_stddevs=[.02], bias_init_consts=[1], penalty=0.0,
               learning_rate=.001, momentum=".9", optimizer="adam",
               batch_size=50, num_classes=2, logdir, train=True, verbosity=None):
    """Constructs the computational graph.

    Args:
      train: whether model is in train mode
      model_params: dictionary of model parameters
      logdir: Location to save data

    This function constructs the computational graph for the model. It relies
    subclassed methods (build/cost) to construct specific graphs.
    """
    self.graph = tf.Graph() 
    self.model_params = model_params
    self.logdir = logdir
    self.tasks = tasks
    self.task_types = task_types
@@ -143,7 +150,6 @@ class TensorflowGraph(object):
    with self.graph.as_default():
      self.require_attributes(['output', 'labels', 'weights'])
      epsilon = 1e-3  # small float to avoid dividing by zero
      model_params = self.model_params
      weighted_costs = []  # weighted costs for each example
      gradient_costs = []  # costs used for gradient calculation

@@ -162,7 +168,7 @@ class TensorflowGraph(object):
              # tf.reduce_mean (which can put ops on the CPU) we explicitly
              # calculate with div/sum so it stays on the GPU.
              gradient_cost = tf.div(tf.reduce_sum(weighted_cost),
                                     model_params["batch_size"])
                                     self.batch_size)
              gradient_costs.append(gradient_cost)

        # aggregated costs
@@ -171,8 +177,8 @@ class TensorflowGraph(object):
            loss = tf.add_n(gradient_costs)

          # weight decay
          if model_params["penalty"] != 0.0:
            penalty = model_ops.WeightDecay(model_params)
          if self.penalty != 0.0:
            penalty = model_ops.WeightDecay(self.penalty_type, self.penalty)
            loss += penalty

        # loss used for gradient calculation
@@ -189,8 +195,8 @@ class TensorflowGraph(object):
      else:
        self.updates = tf.no_op(name='updates')

  def fit(self, dataset, shuffle=False, max_checkpoints_to_keep=5,
          log_every_N_batches=50):
  def fit(self, dataset, nb_epoch=10, pad_batches=False, shuffle=False,
          max_checkpoints_to_keep=5, log_every_N_batches=50):
    """Fit the model.

    Args:
@@ -205,14 +211,9 @@ class TensorflowGraph(object):
    time1 = time.time()
    ############################################################## TIMING
    num_datapoints = len(dataset)
    batch_size = self.model_params["batch_size"]
    batch_size = self.batch_size
    step_per_epoch = np.ceil(float(num_datapoints)/batch_size)
    nb_epoch = self.model_params["nb_epoch"]
    log("Training for %d epochs" % nb_epoch, self.verbosity)
    if "pad_batches" in self.model_params:
      pad_batches = self.model_params["pad_batches"]
    else:
      pad_batches = False
    with self.graph.as_default():
      self.require_attributes(['loss', 'updates'])
      train_op = self.get_training_op()
@@ -390,7 +391,7 @@ class TensorflowGraph(object):
    Returns:
    A training op.
    """
    opt = model_ops.Optimizer(self.model_params)
    opt = model_ops.Optimizer(self.optimizer, self.learning_rate, self.momentum)
    return opt.minimize(self.loss, name='train')

  def _get_shared_session(self):
@@ -499,9 +500,8 @@ class TensorflowClassifier(TensorflowGraph):
    feeding and fetching the same tensor.
    """
    with self.graph.as_default():
      model_params = self.model_params
      batch_size = model_params["batch_size"]
      num_classes = model_params["num_classes"]
      batch_size = self.batch_size 
      num_classes = self.num_classes
      labels = []
      for task in xrange(self.num_tasks):
        with tf.name_scope(self.placeholder_scope):
@@ -555,7 +555,7 @@ class TensorflowRegressor(TensorflowGraph):
    feeding and fetching the same tensor.
    """
    with self.graph.as_default():
      batch_size = self.model_params["batch_size"]
      batch_size = self.batch_size
      labels = []
      for task in xrange(self.num_tasks):
        with tf.name_scope(self.placeholder_scope):
@@ -578,9 +578,9 @@ class TensorflowModel(Model):
    self.verbosity = verbosity
    if tf_class is None:
      tf_class = TensorflowGraph
    self.train_model = tf_class(model_params, logdir, tasks, task_types,
    self.train_model = tf_class(logdir, tasks, task_types,
                                train=True, verbosity=verbosity)
    self.eval_model = tf_class(model_params, logdir, tasks, task_types,
    self.eval_model = tf_class(logdir, tasks, task_types,
                                train=False, verbosity=verbosity)
    self.fit_transformers = None

+0 −68
Original line number Diff line number Diff line
try:
    from neuralfingerprint.mol_graph import graph_from_smiles_tuple, degrees
    from neuralfingerprint import mol_graph
    from data_parser import parse_graph
except:
    pass

import numpy as np

class Molecule(object):
    def __init__(self, atoms, adj_mat, type_adj, deg_list, bonds):
        self.atoms = atoms
        self.adj_mat = adj_mat
        self.type_adj = type_adj
        self.deg_list = deg_list
        self.bonds = bonds    
        
    def get_atoms(self):
        return self.atoms
    
    def get_adj_mat(self):
        return self.adj_mat

    def get_type_adj(self):
        return self.type_adj
    
    def get_deg_list(self):
        return self.deg_list
    
    def get_bonds(self):
        return self.bonds

class SmilesDataManager():
    def __init__(self, raw_smiles, targets, bond_decimals):
        self.raw_smiles = raw_smiles
        self.targets = targets

        # Run parser
        self.run(bond_decimals)

    def run(self, bond_decimals):
        self.parse_data(bond_decimals)

    def get_N_molecules(self):
        return len(self.raw_smiles)
        
    def parse_data(self, bond_decimals):
        self.molecules = []

        k = 0
        while k < self.get_N_molecules():
            smile = self.raw_smiles[k]
            target = self.targets[k]

            # Convert smile to graph
            #print(target, smile)
            graph = mol_graph.graph_from_smiles(smile)

            # Get features
            try:
                atoms, adj_mat, type_adj, deg_list, bonds = parse_graph(graph, bond_decimals)
                self.molecules.append(Molecule(atoms, adj_mat, type_adj, deg_list, bonds))
            except:
                # Remove the bad example
                self.raw_smiles = np.delete(self.raw_smiles, k)
                self.targets = np.delete(self.targets, k)
                print("molecule with smile " + smile + " failed to compile")
            k += 1
+7 −8
Original line number Diff line number Diff line
@@ -98,8 +98,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
      mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
        batch_size x num_features.
    """
    assert len(self.model_params["data_shape"]) == 1
    num_features = self.model_params["data_shape"][0]
    num_features = self.n_inputs
    with self.graph.as_default():
      with tf.name_scope(self.placeholder_scope):
        self.mol_features = tf.placeholder(
@@ -107,10 +106,10 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
            shape=[None, num_features],
            name='mol_features')

      layer_sizes = self.model_params["layer_sizes"]
      weight_init_stddevs = self.model_params["weight_init_stddevs"]
      bias_init_consts = self.model_params["bias_init_consts"]
      dropouts = self.model_params["dropouts"]
      layer_sizes = self.layer_sizes
      weight_init_stddevs = self.weight_init_stddevs
      bias_init_consts = self.bias_init_consts
      dropouts = self.dropouts
      lengths_set = {
          len(layer_sizes),
          len(weight_init_stddevs),
@@ -158,13 +157,13 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
      else:
        # Dummy placeholders
        orig_dict["labels_%d" % task] = np.squeeze(to_one_hot(
            np.zeros((self.model_params["batch_size"],))))
            np.zeros((self.batch_size,))))
      if w_b is not None:
        orig_dict["weights_%d" % task] = w_b[:, task]
      else:
        # Dummy placeholders
        orig_dict["weights_%d" % task] = np.ones(
            (self.model_params["batch_size"],)) 
            (self.batch_size,)) 
    return self._get_feed_dict(orig_dict)

  def predict_proba_on_batch(self, X):
+20 −22
Original line number Diff line number Diff line
@@ -211,11 +211,11 @@ def is_training():
    raise ValueError('Training mode has more than one setting.')
  return train[0]

def WeightDecay(model_params):
def WeightDecay(penalty_type, penalty):
  """Add weight decay.

  Args:
    model_params: dictionary.
    model: TensorflowGraph.

  Returns:
    A scalar tensor containing the weight decay cost.
@@ -230,14 +230,13 @@ def WeightDecay(model_params):
      variables.append(v)

  with tf.name_scope('weight_decay'):
    if model_params["penalty_type"] == 'l1':
    if penalty_type == 'l1':
      cost = tf.add_n([tf.reduce_sum(tf.Abs(v)) for v in variables])
    elif model_params["penalty_type"] == 'l2':
    elif penalty_type == 'l2':
      cost = tf.add_n([tf.nn.l2_loss(v) for v in variables])
    else:
      raise NotImplementedError('Unsupported penalty_type %s' %
                                model_params["penalty_type"])
    cost *= model_params["penalty"]
      raise NotImplementedError('Unsupported penalty_type %s' % penalty_type)
    cost *= penalty
    tf.scalar_summary('Weight Decay Cost', cost)
  return cost

@@ -377,11 +376,10 @@ def Transform(tensor, transform, convolution=True, mask=None):
        tensor = model_utils.Mask(tensor, mask)
  return tensor

def Optimizer(model_params):
def Optimizer(optimizer="adam", learning_rate=.001, momentum=.9):
  """Create model optimizer.

  Args:
    model_params: dictionary.

  Returns:
    A training Optimizer.
@@ -390,18 +388,18 @@ def Optimizer(model_params):
    NotImplementedError: If an unsupported optimizer is requested.
  """
  # TODO(user): gradient clipping (see Minimize)
  if model_params["optimizer"] == 'adagrad':
    train_op = tf.train.AdagradOptimizer(model_params["learning_rate"])
  elif model_params["optimizer"] == 'adam':
    train_op = tf.train.AdamOptimizer(model_params["learning_rate"])
  elif model_params["optimizer"] == 'momentum':
    train_op = tf.train.MomentumOptimizer(model_params["learning_rate"],
                                          model_params["momentum"])
  elif model_params["optimizer"] == 'rmsprop':
    train_op = tf.train.RMSPropOptimizer(model_params["learning_rate"],
                                         model_params["momentum"])
  elif model_params["optimizer"] == 'sgd':
    train_op = tf.train.GradientDescentOptimizer(model_params["learning_rate"])
  if optimizer == 'adagrad':
    train_op = tf.train.AdagradOptimizer(learning_rate)
  elif optimizer == 'adam':
    train_op = tf.train.AdamOptimizer(learning_rate)
  elif optimizer == 'momentum':
    train_op = tf.train.MomentumOptimizer(learning_rate,
                                          momentum)
  elif optimizer == 'rmsprop':
    train_op = tf.train.RMSPropOptimizer(learning_rate,
                                         momentum)
  elif optimizer == 'sgd':
    train_op = tf.train.GradientDescentOptimizer(learning_rate)
  else:
    raise NotImplementedError('Unsupported optimizer %s' % model_params["optimizer"])
    raise NotImplementedError('Unsupported optimizer %s' % optimizer)
  return train_op