Able to train and evaluate 3D CNNs. (b3b685ab) · Commits · 钟慕尧 / deepchem

deep_chem/models/deep3d.py

+23 −6

Original line number	Diff line number	Diff line
		@@ -11,6 +11,8 @@ from deep_chem.utils.preprocess import train_test_random_split
		from deep_chem.utils.load import load_and_transform_dataset
		from deep_chem.utils.preprocess import tensor_dataset_to_numpy
		from deep_chem.datasets.shapes_3d import load_data
		from deep_chem.utils.evaluate import eval_model
		from deep_chem.utils.evaluate import compute_r2_scores

		# TODO(rbharath): Factor this out into a separate function in utils. Duplicates
		# code in deep.py
		@@ -30,21 +32,27 @@ def process_3D_convolutions(paths, task_transforms, seed=None, splittype="random
		train, test = train_test_scaffold_split(dataset)
		X_train, y_train, W_train = tensor_dataset_to_numpy(train)
		X_test, y_test, W_test = tensor_dataset_to_numpy(test)
		return (X_train, y_train, W_train), (X_test, y_test, W_test)
		return (X_train, y_train, W_train, train), (X_test, y_test, W_test, test)

		def fit_3D_convolution(paths, task_types, task_transforms, axis_length=32, **training_params):
		"""
		Perform stochastic gradient descent for a 3D CNN.
		"""
		# TODO(rbharath): task_types is not yet used below.
		(X_train, y_train, W_train), (X_test, y_test, W_test) = process_3D_convolutions(
		(X_train, y_train, W_train, train), (X_test, y_test, W_test, test) = process_3D_convolutions(
		paths, task_transforms)
		nb_classes = 2
		print "np.shape(y_train)"
		print np.shape(y_train)
		print "np.shape(X_train): " + str(np.shape(X_train))
		print "np.shape(y_train): " + str(np.shape(y_train))
		train_3D_convolution(X_train, y_train, axis_length, **training_params)
		model = train_3D_convolution(X_train, y_train, axis_length, **training_params)
		results = eval_model(test, model, task_types,
		modeltype="keras", mode="tensor")
		local_task_types = task_types.copy()
		r2s = compute_r2_scores(results, local_task_types)
		if r2s:
		print "Mean R^2: %f" % np.mean(np.array(r2s.values()))

		def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
		"""
		@@ -57,6 +65,13 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
		"""
		print "train_3D_convolution"
		print "axis_length: " + str(axis_length)
		print "np.shape(X): " + str(np.shape(X))
		print "Shuffling X dimensions"
		# TODO(rbharath): Modify the featurization so that it matches desired shaped.
		(n_samples, axis_length, _, _, n_channels) = np.shape(X)
		# TODO(rbharath): Modify the featurization so that it matches desired shaped.
		X = np.reshape(X, (n_samples, axis_length, n_channels, axis_length, axis_length))
		print "np.shape(X): " + str(np.shape(X))
		# Number of classes for classification
		nb_classes = 2

		@@ -71,10 +86,10 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
		nb_conv = [7, 5, 3]

		model = Sequential()
		# TODO(rbharath): Avoid hard coding the number of stacks here
		# TODO(rbharath): Avoid hard coding the number of staks here
		model.add(Convolution3D(nb_filter=nb_filters[0], stack_size=3,
		nb_row=nb_conv[0], nb_col=nb_conv[0], nb_depth=nb_conv[0],
		border_mode='valid', input_shape=(32, 32, 32, 3)))
		border_mode='valid'))
		model.add(Activation('relu'))
		model.add(MaxPooling3D(poolsize=(nb_pool[0], nb_pool[0], nb_pool[0])))
		model.add(Convolution3D(nb_filter=nb_filters[1], stack_size=nb_filters[0],
		@@ -88,13 +103,15 @@ def train_3D_convolution(X, y, axis_length=32, batch_size=50, nb_epoch=1):
		model.add(Activation('relu'))
		model.add(MaxPooling3D(poolsize=(nb_pool[2], nb_pool[2], nb_pool[2])))
		model.add(Flatten())
		model.add(Dense(320, 32/2, init='normal'))
		model.add(Dense(32, 32/2, init='normal'))
		model.add(Activation('relu'))
		model.add(Dropout(0.5))
		# TODO(rbharath): Generalize this to support classification as well as regression.
		model.add(Dense(32/2, 1, init='normal'))

		sgd = RMSprop(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
		print "About to compile model"
		model.compile(loss='mean_squared_error', optimizer=sgd)
		print "About to fit data to model."
		model.fit(X, y, batch_size=batch_size, nb_epoch=nb_epoch)
		return model

deep_chem/utils/evaluate.py

+13 −6

Original line number	Diff line number	Diff line
		@@ -8,6 +8,7 @@ __license__ = "LGPL"
		import numpy as np
		import warnings
		from deep_chem.utils.preprocess import dataset_to_numpy
		from deep_chem.utils.preprocess import tensor_dataset_to_numpy
		from deep_chem.utils.preprocess import labels_to_weights
		from sklearn.metrics import mean_squared_error
		from sklearn.metrics import roc_auc_score
		@@ -16,7 +17,7 @@ from rdkit import Chem
		from rdkit.Chem.Descriptors import ExactMolWt

		def model_predictions(test_set, model, n_targets, task_types,
		modeltype="sklearn"):
		modeltype="sklearn", mode="regular"):
		"""Obtains predictions of provided model on test_set.

		Returns a list of per-task predictions.
		@@ -39,7 +40,15 @@ def model_predictions(test_set, model, n_targets, task_types,
		Either sklearn, keras, or keras_multitask
		"""
		# Extract features for test set and make preds
		if mode == "regular":
		X, _, _ = dataset_to_numpy(test_set)
		elif mode == "tensor":
		X, _, _ = tensor_dataset_to_numpy(test_set)
		(n_samples, axis_length, _, _, n_channels) = np.shape(X)
		# TODO(rbharath): Modify the featurization so that it matches desired shaped.
		X = np.reshape(X, (n_samples, axis_length, n_channels, axis_length, axis_length))
		else:
		raise ValueError("Improper mode: " + str(mode))
		if modeltype == "keras_multitask":
		predictions = model.predict({"input": X})
		ypreds = []
		@@ -121,9 +130,7 @@ def size_eval_model(test_set, model, task_types, modeltype="sklearn"):
		print "RMS: " + str(target_rms)




		def eval_model(test_set, model, task_types, modeltype="sklearn"):
		def eval_model(test_set, model, task_types, modeltype="sklearn", mode="regular"):
		"""Evaluates the provided model on the test-set.

		Returns a dict which maps target-names to pairs of np.ndarrays (ytrue,
		@@ -147,7 +154,7 @@ def eval_model(test_set, model, task_types, modeltype="sklearn"):
		local_task_types = task_types.copy()
		endpoints = sorted_targets
		ypreds = model_predictions(test_set, model, len(sorted_targets),
		local_task_types, modeltype=modeltype)
		local_task_types, modeltype=modeltype, mode=mode)
		results = {}
		for target in endpoints:
		results[target] = ([], []) # (ytrue, yscore)

Admin message