Commit 2f703bf4 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Added Vanilla DNN Model impl. Stil debugging eval.

parent 415033d0
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@ class Model(object):
  """
  Abstract base class for different ML models.
  """
  # List of registered models
  registered_model_types = {}
  def __init__(self, task_types, model_params, initialize_raw_model=True):
    self.task_types = task_types
    self.model_params = model_params
@@ -41,6 +43,23 @@ class Model(object):
    """
    return(self.raw_model)

  @staticmethod
  def model_builder(model_type, task_types, model_params,
                    initialize_raw_model=True):
    print("model_builder()")
    print("model_params")
    print(model_params)
    if model_type in Model.registered_model_types:
      model = Model.registered_model_types[model_type](
          task_types, model_params, initialize_raw_model)
    else:
      raise ValueError("model_type %s is not supported" % model_type)
    return model

  @staticmethod
  def register_model_type(model_type, model_class):
    Model.registered_model_types[model_type] = model_class


'''
def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):
+77 −73
Original line number Diff line number Diff line
@@ -7,53 +7,103 @@ from keras.layers.core import Dense, Dropout
from keras.optimizers import SGD
from deep_chem.models import Model

#TODO(rbharath/enf): Make this real. It's a dummy now.
class SingleTaskDNN(Model):
class MultiTaskDNN(Model):
  """
  Abstract base class for different ML models.
  """
  def __init__(self, task_types, model_params, initialize_raw_model=True):
    self.task_types = task_types
    self.model_params = model_params
    self.raw_model = None
    super(MultiTaskDNN, self).__init__(task_types, model_params,
                                       initialize_raw_model)
    if initialize_raw_model:
      sorted_tasks = sorted(task_types.keys())
      (n_inputs,) = model_params["data_shape"]
      model = Graph()
      model.add_input(name="input", input_shape=(n_inputs,))
      model.add_node(
          Dense(model_params["nb_hidden"], init='uniform',
                activation=model_params["activation"]),
          name="dense", input="input")
      model.add_node(Dropout(model_params["dropout"]), name="dropout",
                             input="dense")
      top_layer = "dropout"
      for ind, task in enumerate(sorted_tasks):
        task_type = task_types[task]
        if task_type == "classification":
          model.add_node(
              Dense(2, init='uniform', activation="softmax"),
              name="dense_head%d" % ind, input=top_layer)
        elif task_type == "regression":
          model.add_node(
              Dense(1, init='uniform'),
              name="dense_head%d" % ind, input=top_layer)
        model.add_output(name="task%d" % ind, input="dense_head%d" % ind)

      loss_dict = {}
      for ind, task in enumerate(sorted_tasks):
        task_type, taskname = task_types[task], "task%d" % ind
        if task_type == "classification":
          loss_dict[taskname] = "binary_crossentropy"
        elif task_type == "regression":
          loss_dict[taskname] = "mean_squared_error"
      sgd = SGD(lr=model_params["learning_rate"],
                decay=model_params["decay"],
                momentum=model_params["momentum"],
                nesterov=model_params["nesterov"])
      model.compile(optimizer=sgd, loss=loss_dict)
      self.raw_model = model

  def get_data_dict(self, X, y=None):
    data = {}
    data["input"] = X
    for ind, task in enumerate(sorted(self.task_types.keys())):
      task_type, taskname = task_types[task], "task%d" % ind
      if y is not None:
        if task_type == "classification":
          data[taskname] = to_one_hot(y[:, ind])
        elif task_type == "regression":
          data[taskname] = y[:, ind]
    return data

  def get_sample_weight(self, w):
    """Get dictionaries needed to fit models"""
    sample_weight = {}
    for ind, task in enumerate(sorted(self.task_types.keys())):
      sample_weight["task%d" % ind] = w[:, ind]
    return sample_weight

  def fit_on_batch(self, X, y, w):
    """
    Updates existing model with new information.
    """
    raise NotImplementedError(
        "Each model is responsible for its own fit_on_batch method.")
    eps = .001
    # Add eps weight to avoid minibatches with zero weight (causes theano to crash).
    W = W + eps * np.ones(np.shape(W))
    data = self.get_data_dict(X, y)
    sample_weight = self.get_sample_weight(w)
    loss = self.raw_model.train_on_batch(data, sample_weight=sample_weight)

  def predict_on_batch(self, X):
    """
    Makes predictions on given batch of new data.
    """
    raise NotImplementedError(
        "Each model is responsible for its own predict_on_batch method.")    
    data = self.get_data_dict(X)
    y_pred = self.raw_model.predict_on_batch(data)
    y_pred = np.squeeze(y_pred)
    return y_pred

#TODO(rbharath/enf): Make this real. It's a dummy now.
class MultiTaskDNN(Model):
Model.register_model_type("multitask_deep_regressor", MultiTaskDNN)
Model.register_model_type("multitask_deep_classifier", MultiTaskDNN)

class SingleTaskDNN(MultiTaskDNN):
  """
  Abstract base class for different ML models.
  """
  def __init__(self, task_types, model_params, initialize_raw_model=True):
    self.task_types = task_types
    self.model_params = model_params
    self.raw_model = None
    super(SingleTaskDNN, self).__init__(task_types, model_params,
                                       initialize_raw_model)

  def fit_on_batch(self, X, y, w):
    """
    Updates existing model with new information.
    """
    raise NotImplementedError(
        "Each model is responsible for its own fit_on_batch method.")

  def predict_on_batch(self, X):
    """
    Makes predictions on given batch of new data.
    """
    raise NotImplementedError(
        "Each model is responsible for its own predict_on_batch method.")   
Model.register_model_type("singletask_deep_regressor", SingleTaskDNN)
Model.register_model_type("singletask_deep_classifier", SingleTaskDNN)

def to_one_hot(y):
  """Transforms label vector into one-hot encoding.
@@ -154,52 +204,6 @@ def train_multitask_model(X, y, W, task_types, learning_rate=0.01,
  nb_epoch: int
    maximal number of epochs to run the optimizer
  """
  eps = .001
  sorted_tasks = sorted(task_types.keys())
  local_task_types = task_types.copy()
  endpoints = sorted_tasks
  print "train_multitask_model()"
  print "np.shape(X)"
  print np.shape(X)
  n_inputs = len(X[0].flatten())
  # Add eps weight to avoid minibatches with zero weight (causes theano to crash).
  W = W + eps * np.ones(np.shape(W))
  print "np.shape(W)"
  print np.shape(W)
  model = Graph()
  #model.add_input(name="input", ndim=n_inputs)
  model.add_input(name="input", input_shape=(n_inputs,))
  model.add_node(
      Dense(nb_hidden, init='uniform', activation=activation),
      name="dense", input="input")
  model.add_node(Dropout(dropout), name="dropout", input="dense")
  top_layer = "dropout"
  for ind, task in enumerate(endpoints):
    task_type = local_task_types[task]
    if task_type == "classification":
      model.add_node(
          Dense(2, init='uniform', activation="softmax"),
          name="dense_head%d" % ind, input=top_layer)
    elif task_type == "regression":
      model.add_node(
          Dense(1, init='uniform'),
          name="dense_head%d" % ind, input=top_layer)
    model.add_output(name="task%d" % ind, input="dense_head%d" % ind)
  data_dict, loss_dict, sample_weights = {}, {}, {}
  data_dict["input"] = X
  for ind, task in enumerate(endpoints):
    task_type = local_task_types[task]
    taskname = "task%d" % ind
    sample_weights[taskname] = W[:, ind]
    if task_type == "classification":
      loss_dict[taskname] = "binary_crossentropy"
      data_dict[taskname] = to_one_hot(y[:, ind])
    elif task_type == "regression":
      loss_dict[taskname] = "mean_squared_error"
      data_dict[taskname] = y[:, ind]
  sgd = SGD(lr=learning_rate, decay=decay, momentum=momentum, nesterov=nesterov)
  print "About to compile model!"
  model.compile(optimizer=sgd, loss=loss_dict)
  print "Done compiling. About to fit model!"
  print "validation_split: " + str(validation_split)
  model.fit(data_dict, nb_epoch=nb_epoch, batch_size=batch_size,
+2 −1
Original line number Diff line number Diff line
@@ -76,7 +76,6 @@ class DockingDNN(Model):
      self.raw_model = model

  def fit_on_batch(self, X, y, w):
    # TODO(rbharath): Modify the featurization so that it matches desired shaped.
    X = shuffle_data(X)
    loss = self.raw_model.train_on_batch(X, y)
    print("Loss: %f" % loss)
@@ -89,3 +88,5 @@ class DockingDNN(Model):
    y_pred = self.raw_model.predict_on_batch(X)
    y_pred = np.squeeze(y_pred)
    return y_pred

Model.register_model_type("convolutional_3D_regressor", DockingDNN)
+20 −20
Original line number Diff line number Diff line
@@ -7,24 +7,24 @@ from __future__ import division
from __future__ import unicode_literals

from deep_chem.models.deep import SingleTaskDNN
from deep_chem.models.deep import MultiTaskDNN
from deep_chem.models.deep3d import DockingDNN
from deep_chem.models.standard import SklearnModel
#from deep_chem.models.deep import MultiTaskDNN
#from deep_chem.models.deep3d import DockingDNN
#from deep_chem.models.standard import SklearnModel

def model_builder(model_type, task_types, model_params,
                  initialize_raw_model=True):
  """
  Factory function to construct model.
  """
  if model_type == "singletask_deep_network":
    model = SingleTaskDNN(task_types, model_params,
                          initialize_raw_model)
  elif model_type == "multitask_deep_network":
    model = MultiTaskDNN(task_types, model_params,
                         initialize_raw_model)
  elif model_type == "convolutional_3D_regressor":
    model = DockingDNN(task_types, model_params,
                       initialize_raw_model)
  else:
    model = SklearnModel(task_types, model_params)
  return model
#def model_builder(model_type, task_types, model_params,
#                  initialize_raw_model=True):
#  """
#  Factory function to construct model.
#  """
#  if model_type == "singletask_deep_network":
#    model = SingleTaskDNN(task_types, model_params,
#                          initialize_raw_model)
#  elif model_type == "multitask_deep_network":
#    model = MultiTaskDNN(task_types, model_params,
#                         initialize_raw_model)
#  elif model_type == "convolutional_3D_regressor":
#    model = DockingDNN(task_types, model_params,
#                       initialize_raw_model)
#  else:
#    model = SklearnModel(task_types, model_params)
#  return model
+18 −1
Original line number Diff line number Diff line
@@ -137,6 +137,15 @@ def add_model_group(fit_cmd):
  group.add_argument(
      "--decay", type=float, default=1e-4,
      help="Learning rate decay for NN models.")
  group.add_argument(
      "--activation", type=str, default="relu",
      help="NN activation function.")
  group.add_argument(
      "--momentum", type=float, default=.9,
      help="Momentum for stochastic gradient descent.")
  group.add_argument(
      "--nesterov", action="store_true",
      help="If set, use Nesterov acceleration.")

def add_fit_command(subparsers):
  """Adds arguments for fit subcommand."""
@@ -172,6 +181,13 @@ def add_eval_command(subparsers):
      help="Computed statistics on evaluated set.")
  eval_cmd.set_defaults(func=eval_trained_model_wrapper)

def add_predict_command(subparsers):
  """Adds arguments for predict subcommand."""
  predict_cmd = subparsers.add_parser(
    "predict",
    help="Make predictions of model on new data.")
  #group = predict_cmd.add_a

# TODO(rbharath): There are a lot of duplicate commands introduced here. Is
# there a nice way to factor them?
def add_model_command(subparsers):
@@ -225,7 +241,8 @@ def extract_model_params(args):
  Given input arguments, return a dict specifiying model parameters.
  """
  params = ["nb_hidden", "learning_rate", "dropout",
            "nb_epoch", "decay", "batch_size", "loss_function"]
            "nb_epoch", "decay", "batch_size", "loss_function",
            "activation", "momentum", "nesterov"]

  model_params = {param : getattr(args, param) for param in params}
  return(model_params)
Loading