Added Vanilla DNN Model impl. Stil debugging eval. (2f703bf4) · Commits · 钟慕尧 / deepchem

deep_chem/models/init.py

+19 −0

Original line number	Diff line number	Diff line
		@@ -11,6 +11,8 @@ class Model(object):
		"""
		Abstract base class for different ML models.
		"""
		# List of registered models
		registered_model_types = {}
		def __init__(self, task_types, model_params, initialize_raw_model=True):
		self.task_types = task_types
		self.model_params = model_params
		@@ -41,6 +43,23 @@ class Model(object):
		"""
		return(self.raw_model)

		@staticmethod
		def model_builder(model_type, task_types, model_params,
		initialize_raw_model=True):
		print("model_builder()")
		print("model_params")
		print(model_params)
		if model_type in Model.registered_model_types:
		model = Model.registered_model_types[model_type](
		task_types, model_params, initialize_raw_model)
		else:
		raise ValueError("model_type %s is not supported" % model_type)
		return model

		@staticmethod
		def register_model_type(model_type, model_class):
		Model.registered_model_types[model_type] = model_class


		'''
		def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):

deep_chem/models/deep.py

+77 −73

Original line number	Diff line number	Diff line
		@@ -7,53 +7,103 @@ from keras.layers.core import Dense, Dropout
		from keras.optimizers import SGD
		from deep_chem.models import Model

		#TODO(rbharath/enf): Make this real. It's a dummy now.
		class SingleTaskDNN(Model):
		class MultiTaskDNN(Model):
		"""
		Abstract base class for different ML models.
		"""
		def __init__(self, task_types, model_params, initialize_raw_model=True):
		self.task_types = task_types
		self.model_params = model_params
		self.raw_model = None
		super(MultiTaskDNN, self).__init__(task_types, model_params,
		initialize_raw_model)
		if initialize_raw_model:
		sorted_tasks = sorted(task_types.keys())
		(n_inputs,) = model_params["data_shape"]
		model = Graph()
		model.add_input(name="input", input_shape=(n_inputs,))
		model.add_node(
		Dense(model_params["nb_hidden"], init='uniform',
		activation=model_params["activation"]),
		name="dense", input="input")
		model.add_node(Dropout(model_params["dropout"]), name="dropout",
		input="dense")
		top_layer = "dropout"
		for ind, task in enumerate(sorted_tasks):
		task_type = task_types[task]
		if task_type == "classification":
		model.add_node(
		Dense(2, init='uniform', activation="softmax"),
		name="dense_head%d" % ind, input=top_layer)
		elif task_type == "regression":
		model.add_node(
		Dense(1, init='uniform'),
		name="dense_head%d" % ind, input=top_layer)
		model.add_output(name="task%d" % ind, input="dense_head%d" % ind)

		loss_dict = {}
		for ind, task in enumerate(sorted_tasks):
		task_type, taskname = task_types[task], "task%d" % ind
		if task_type == "classification":
		loss_dict[taskname] = "binary_crossentropy"
		elif task_type == "regression":
		loss_dict[taskname] = "mean_squared_error"
		sgd = SGD(lr=model_params["learning_rate"],
		decay=model_params["decay"],
		momentum=model_params["momentum"],
		nesterov=model_params["nesterov"])
		model.compile(optimizer=sgd, loss=loss_dict)
		self.raw_model = model

		def get_data_dict(self, X, y=None):
		data = {}
		data["input"] = X
		for ind, task in enumerate(sorted(self.task_types.keys())):
		task_type, taskname = task_types[task], "task%d" % ind
		if y is not None:
		if task_type == "classification":
		data[taskname] = to_one_hot(y[:, ind])
		elif task_type == "regression":
		data[taskname] = y[:, ind]
		return data

		def get_sample_weight(self, w):
		"""Get dictionaries needed to fit models"""
		sample_weight = {}
		for ind, task in enumerate(sorted(self.task_types.keys())):
		sample_weight["task%d" % ind] = w[:, ind]
		return sample_weight

		def fit_on_batch(self, X, y, w):
		"""
		Updates existing model with new information.
		"""
		raise NotImplementedError(
		"Each model is responsible for its own fit_on_batch method.")
		eps = .001
		# Add eps weight to avoid minibatches with zero weight (causes theano to crash).
		W = W + eps * np.ones(np.shape(W))
		data = self.get_data_dict(X, y)
		sample_weight = self.get_sample_weight(w)
		loss = self.raw_model.train_on_batch(data, sample_weight=sample_weight)

		def predict_on_batch(self, X):
		"""
		Makes predictions on given batch of new data.
		"""
		raise NotImplementedError(
		"Each model is responsible for its own predict_on_batch method.")
		data = self.get_data_dict(X)
		y_pred = self.raw_model.predict_on_batch(data)
		y_pred = np.squeeze(y_pred)
		return y_pred

		#TODO(rbharath/enf): Make this real. It's a dummy now.
		class MultiTaskDNN(Model):
		Model.register_model_type("multitask_deep_regressor", MultiTaskDNN)
		Model.register_model_type("multitask_deep_classifier", MultiTaskDNN)

		class SingleTaskDNN(MultiTaskDNN):
		"""
		Abstract base class for different ML models.
		"""
		def __init__(self, task_types, model_params, initialize_raw_model=True):
		self.task_types = task_types
		self.model_params = model_params
		self.raw_model = None
		super(SingleTaskDNN, self).__init__(task_types, model_params,
		initialize_raw_model)

		def fit_on_batch(self, X, y, w):
		"""
		Updates existing model with new information.
		"""
		raise NotImplementedError(
		"Each model is responsible for its own fit_on_batch method.")

		def predict_on_batch(self, X):
		"""
		Makes predictions on given batch of new data.
		"""
		raise NotImplementedError(
		"Each model is responsible for its own predict_on_batch method.")
		Model.register_model_type("singletask_deep_regressor", SingleTaskDNN)
		Model.register_model_type("singletask_deep_classifier", SingleTaskDNN)

		def to_one_hot(y):
		"""Transforms label vector into one-hot encoding.
		@@ -154,52 +204,6 @@ def train_multitask_model(X, y, W, task_types, learning_rate=0.01,
		nb_epoch: int
		maximal number of epochs to run the optimizer
		"""
		eps = .001
		sorted_tasks = sorted(task_types.keys())
		local_task_types = task_types.copy()
		endpoints = sorted_tasks
		print "train_multitask_model()"
		print "np.shape(X)"
		print np.shape(X)
		n_inputs = len(X[0].flatten())
		# Add eps weight to avoid minibatches with zero weight (causes theano to crash).
		W = W + eps * np.ones(np.shape(W))
		print "np.shape(W)"
		print np.shape(W)
		model = Graph()
		#model.add_input(name="input", ndim=n_inputs)
		model.add_input(name="input", input_shape=(n_inputs,))
		model.add_node(
		Dense(nb_hidden, init='uniform', activation=activation),
		name="dense", input="input")
		model.add_node(Dropout(dropout), name="dropout", input="dense")
		top_layer = "dropout"
		for ind, task in enumerate(endpoints):
		task_type = local_task_types[task]
		if task_type == "classification":
		model.add_node(
		Dense(2, init='uniform', activation="softmax"),
		name="dense_head%d" % ind, input=top_layer)
		elif task_type == "regression":
		model.add_node(
		Dense(1, init='uniform'),
		name="dense_head%d" % ind, input=top_layer)
		model.add_output(name="task%d" % ind, input="dense_head%d" % ind)
		data_dict, loss_dict, sample_weights = {}, {}, {}
		data_dict["input"] = X
		for ind, task in enumerate(endpoints):
		task_type = local_task_types[task]
		taskname = "task%d" % ind
		sample_weights[taskname] = W[:, ind]
		if task_type == "classification":
		loss_dict[taskname] = "binary_crossentropy"
		data_dict[taskname] = to_one_hot(y[:, ind])
		elif task_type == "regression":
		loss_dict[taskname] = "mean_squared_error"
		data_dict[taskname] = y[:, ind]
		sgd = SGD(lr=learning_rate, decay=decay, momentum=momentum, nesterov=nesterov)
		print "About to compile model!"
		model.compile(optimizer=sgd, loss=loss_dict)
		print "Done compiling. About to fit model!"
		print "validation_split: " + str(validation_split)
		model.fit(data_dict, nb_epoch=nb_epoch, batch_size=batch_size,

deep_chem/models/deep3d.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -76,7 +76,6 @@ class DockingDNN(Model):
		self.raw_model = model

		def fit_on_batch(self, X, y, w):
		# TODO(rbharath): Modify the featurization so that it matches desired shaped.
		X = shuffle_data(X)
		loss = self.raw_model.train_on_batch(X, y)
		print("Loss: %f" % loss)
		@@ -89,3 +88,5 @@ class DockingDNN(Model):
		y_pred = self.raw_model.predict_on_batch(X)
		y_pred = np.squeeze(y_pred)
		return y_pred

		Model.register_model_type("convolutional_3D_regressor", DockingDNN)

deep_chem/models/model.py

+20 −20

Original line number	Diff line number	Diff line
		@@ -7,24 +7,24 @@ from __future__ import division
		from __future__ import unicode_literals

		from deep_chem.models.deep import SingleTaskDNN
		from deep_chem.models.deep import MultiTaskDNN
		from deep_chem.models.deep3d import DockingDNN
		from deep_chem.models.standard import SklearnModel
		#from deep_chem.models.deep import MultiTaskDNN
		#from deep_chem.models.deep3d import DockingDNN
		#from deep_chem.models.standard import SklearnModel

		def model_builder(model_type, task_types, model_params,
		initialize_raw_model=True):
		"""
		Factory function to construct model.
		"""
		if model_type == "singletask_deep_network":
		model = SingleTaskDNN(task_types, model_params,
		initialize_raw_model)
		elif model_type == "multitask_deep_network":
		model = MultiTaskDNN(task_types, model_params,
		initialize_raw_model)
		elif model_type == "convolutional_3D_regressor":
		model = DockingDNN(task_types, model_params,
		initialize_raw_model)
		else:
		model = SklearnModel(task_types, model_params)
		return model
		#def model_builder(model_type, task_types, model_params,
		# initialize_raw_model=True):
		# """
		# Factory function to construct model.
		# """
		# if model_type == "singletask_deep_network":
		# model = SingleTaskDNN(task_types, model_params,
		# initialize_raw_model)
		# elif model_type == "multitask_deep_network":
		# model = MultiTaskDNN(task_types, model_params,
		# initialize_raw_model)
		# elif model_type == "convolutional_3D_regressor":
		# model = DockingDNN(task_types, model_params,
		# initialize_raw_model)
		# else:
		# model = SklearnModel(task_types, model_params)
		# return model

deep_chem/scripts/modeler.py

+18 −1

Original line number	Diff line number	Diff line
		@@ -137,6 +137,15 @@ def add_model_group(fit_cmd):
		group.add_argument(
		"--decay", type=float, default=1e-4,
		help="Learning rate decay for NN models.")
		group.add_argument(
		"--activation", type=str, default="relu",
		help="NN activation function.")
		group.add_argument(
		"--momentum", type=float, default=.9,
		help="Momentum for stochastic gradient descent.")
		group.add_argument(
		"--nesterov", action="store_true",
		help="If set, use Nesterov acceleration.")

		def add_fit_command(subparsers):
		"""Adds arguments for fit subcommand."""
		@@ -172,6 +181,13 @@ def add_eval_command(subparsers):
		help="Computed statistics on evaluated set.")
		eval_cmd.set_defaults(func=eval_trained_model_wrapper)

		def add_predict_command(subparsers):
		"""Adds arguments for predict subcommand."""
		predict_cmd = subparsers.add_parser(
		"predict",
		help="Make predictions of model on new data.")
		#group = predict_cmd.add_a

		# TODO(rbharath): There are a lot of duplicate commands introduced here. Is
		# there a nice way to factor them?
		def add_model_command(subparsers):
		@@ -225,7 +241,8 @@ def extract_model_params(args):
		Given input arguments, return a dict specifiying model parameters.
		"""
		params = ["nb_hidden", "learning_rate", "dropout",
		"nb_epoch", "decay", "batch_size", "loss_function"]
		"nb_epoch", "decay", "batch_size", "loss_function",
		"activation", "momentum", "nesterov"]

		model_params = {param : getattr(args, param) for param in params}
		return(model_params)

Admin message