Commit b3b685ab authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Able to train and evaluate 3D CNNs.

parent df94fa0e
Loading
Loading
Loading
Loading
+23 −6
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@ from deep_chem.utils.preprocess import train_test_random_split
from deep_chem.utils.load import load_and_transform_dataset
from deep_chem.utils.preprocess import tensor_dataset_to_numpy
from deep_chem.datasets.shapes_3d import load_data
from deep_chem.utils.evaluate import eval_model
from deep_chem.utils.evaluate import compute_r2_scores

# TODO(rbharath): Factor this out into a separate function in utils. Duplicates
# code in deep.py
@@ -30,21 +32,27 @@ def process_3D_convolutions(paths, task_transforms, seed=None, splittype="random
    train, test = train_test_scaffold_split(dataset)
  X_train, y_train, W_train = tensor_dataset_to_numpy(train)
  X_test, y_test, W_test = tensor_dataset_to_numpy(test)
  return (X_train, y_train, W_train), (X_test, y_test, W_test)
  return (X_train, y_train, W_train, train), (X_test, y_test, W_test, test)

def fit_3D_convolution(paths, task_types, task_transforms, axis_length=32, **training_params):
  """
  Perform stochastic gradient descent for a 3D CNN.
  """
  # TODO(rbharath): task_types is not yet used below.
  (X_train, y_train, W_train), (X_test, y_test, W_test) = process_3D_convolutions(
  (X_train, y_train, W_train, train), (X_test, y_test, W_test, test) = process_3D_convolutions(
    paths, task_transforms)
  nb_classes = 2
  print "np.shape(y_train)"
  print np.shape(y_train)
  print "np.shape(X_train): " + str(np.shape(X_train))
  print "np.shape(y_train): " + str(np.shape(y_train))
  train_3D_convolution(X_train, y_train, axis_length, **training_params)
  model = train_3D_convolution(X_train, y_train, axis_length, **training_params)
  results = eval_model(test, model, task_types,
      modeltype="keras", mode="tensor")
  local_task_types = task_types.copy()
  r2s = compute_r2_scores(results, local_task_types)
  if r2s:
    print "Mean R^2: %f" % np.mean(np.array(r2s.values()))

def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
  """
@@ -57,6 +65,13 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
  """
  print "train_3D_convolution"
  print "axis_length: " + str(axis_length)
  print "np.shape(X): " + str(np.shape(X))
  print "Shuffling X dimensions"
  # TODO(rbharath): Modify the featurization so that it matches desired shaped. 
  (n_samples, axis_length, _, _, n_channels) = np.shape(X)
  # TODO(rbharath): Modify the featurization so that it matches desired shaped. 
  X = np.reshape(X, (n_samples, axis_length, n_channels, axis_length, axis_length))
  print "np.shape(X): " + str(np.shape(X))
  # Number of classes for classification
  nb_classes = 2

@@ -71,10 +86,10 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
  nb_conv = [7, 5, 3]

  model = Sequential()
  # TODO(rbharath): Avoid hard coding the number of stacks here
  # TODO(rbharath): Avoid hard coding the number of staks here
  model.add(Convolution3D(nb_filter=nb_filters[0], stack_size=3,
     nb_row=nb_conv[0], nb_col=nb_conv[0], nb_depth=nb_conv[0],
     border_mode='valid', input_shape=(32, 32, 32, 3)))
     border_mode='valid'))
  model.add(Activation('relu'))
  model.add(MaxPooling3D(poolsize=(nb_pool[0], nb_pool[0], nb_pool[0])))
  model.add(Convolution3D(nb_filter=nb_filters[1], stack_size=nb_filters[0],
@@ -88,13 +103,15 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
  model.add(Activation('relu'))
  model.add(MaxPooling3D(poolsize=(nb_pool[2], nb_pool[2], nb_pool[2])))
  model.add(Flatten())
  model.add(Dense(320, 32/2, init='normal'))
  model.add(Dense(32, 32/2, init='normal'))
  model.add(Activation('relu'))
  model.add(Dropout(0.5))
  # TODO(rbharath): Generalize this to support classification as well as regression.
  model.add(Dense(32/2, 1, init='normal'))

  sgd = RMSprop(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
  print "About to compile model"
  model.compile(loss='mean_squared_error', optimizer=sgd)
  print "About to fit data to model."
  model.fit(X, y, batch_size=batch_size, nb_epoch=nb_epoch)
  return model
+13 −6
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ __license__ = "LGPL"
import numpy as np
import warnings
from deep_chem.utils.preprocess import dataset_to_numpy
from deep_chem.utils.preprocess import tensor_dataset_to_numpy
from deep_chem.utils.preprocess import labels_to_weights
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
@@ -16,7 +17,7 @@ from rdkit import Chem
from rdkit.Chem.Descriptors import ExactMolWt

def model_predictions(test_set, model, n_targets, task_types,
    modeltype="sklearn"):
    modeltype="sklearn", mode="regular"):
  """Obtains predictions of provided model on test_set.

  Returns a list of per-task predictions.
@@ -39,7 +40,15 @@ def model_predictions(test_set, model, n_targets, task_types,
    Either sklearn, keras, or keras_multitask
  """
  # Extract features for test set and make preds
  if mode == "regular":
    X, _, _ = dataset_to_numpy(test_set)
  elif mode == "tensor":
    X, _, _ = tensor_dataset_to_numpy(test_set)
    (n_samples, axis_length, _, _, n_channels) = np.shape(X)
    # TODO(rbharath): Modify the featurization so that it matches desired shaped. 
    X = np.reshape(X, (n_samples, axis_length, n_channels, axis_length, axis_length))
  else:
    raise ValueError("Improper mode: " + str(mode))
  if modeltype == "keras_multitask":
    predictions = model.predict({"input": X})
    ypreds = []
@@ -121,9 +130,7 @@ def size_eval_model(test_set, model, task_types, modeltype="sklearn"):
  print "RMS: " + str(target_rms)

  

  
def eval_model(test_set, model, task_types, modeltype="sklearn"):
def eval_model(test_set, model, task_types, modeltype="sklearn", mode="regular"):
  """Evaluates the provided model on the test-set.

  Returns a dict which maps target-names to pairs of np.ndarrays (ytrue,
@@ -147,7 +154,7 @@ def eval_model(test_set, model, task_types, modeltype="sklearn"):
  local_task_types = task_types.copy()
  endpoints = sorted_targets
  ypreds = model_predictions(test_set, model, len(sorted_targets),
      local_task_types, modeltype=modeltype)
      local_task_types, modeltype=modeltype, mode=mode)
  results = {}
  for target in endpoints:
    results[target] = ([], [])  # (ytrue, yscore)