Merge pull request #1994 from deepchem/logging (8e069fe2) · Commits · 钟慕尧 / deepchem

deepchem/models/models.py

+7 −14

Original line number	Diff line number	Diff line
		"""
		Contains an abstract base class that supports different ML models.
		"""
		__author__ = "Bharath Ramsundar and Joseph Gomes"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "MIT"

		import sys
		import numpy as np
		@@ -15,24 +12,22 @@ import tempfile
		import sklearn
		from sklearn.base import BaseEstimator

		import logging
		from deepchem.data import Dataset, pad_features
		from deepchem.trans import undo_transforms
		from deepchem.utils.save import load_from_disk
		from deepchem.utils.save import save_to_disk
		from deepchem.utils.save import log
		from deepchem.utils.evaluate import Evaluator

		logger = logging.getLogger(__name__)


		class Model(BaseEstimator):
		"""
		Abstract base class for different ML models.
		"""

		def __init__(self,
		model_instance=None,
		model_dir=None,
		verbose=True,
		**kwargs):
		def __init__(self, model_instance=None, model_dir=None, **kwargs):
		"""Abstract class for all models.

		Parameters
		@@ -53,8 +48,6 @@ class Model(BaseEstimator):
		self.model_instance = model_instance
		self.model_class = model_instance.__class__

		self.verbose = verbose

		def __del__(self):
		if 'model_dir_is_temp' in dir(self) and self.model_dir_is_temp:
		shutil.rmtree(self.model_dir)
		@@ -113,13 +106,13 @@ class Model(BaseEstimator):
		# TODO(rbharath/enf): We need a structured way to deal with potential GPU
		# memory overflows.
		for epoch in range(nb_epoch):
		log("Starting epoch %s" % str(epoch + 1), self.verbose)
		logger.info("Starting epoch %s" % str(epoch + 1))
		losses = []
		for (X_batch, y_batch, w_batch,
		ids_batch) in dataset.iterbatches(batch_size):
		losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
		log("Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()),
		self.verbose)
		logger.info(
		"Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()))

		def predict(self, dataset, transformers=[], batch_size=None):
		"""

deepchem/models/progressive_multitask.py

+9 −3

Original line number	Diff line number	Diff line
		@@ -3,7 +3,7 @@ import numpy as np
		import tensorflow as tf
		import collections

		from deepchem.utils.save import log
		import logging
		from deepchem.metrics import to_one_hot
		from deepchem.metrics import from_one_hot
		from deepchem.models import KerasModel, layers
		@@ -11,16 +11,22 @@ from deepchem.models.losses import L2Loss, SparseSoftmaxCrossEntropy
		from deepchem.models.keras_model import _StandardLoss
		from tensorflow.keras.layers import Input, Dense, Dropout, ReLU, Concatenate, Add, Multiply, Softmax

		logger = logging.getLogger(__name__)


		class ProgressiveMultitaskRegressor(KerasModel):
		"""Implements a progressive multitask neural network for regression.

		Progressive Networks: https://arxiv.org/pdf/1606.04671v3.pdf

		Progressive networks allow for multitask learning where each task
		gets a new column of weights. As a result, there is no exponential
		forgetting where previous tasks are ignored.

		References
		----------
		See [1]_ for a full description of the progressive architecture

		.. [1] Rusu, Andrei A., et al. "Progressive neural networks." arXiv preprint
		arXiv:1606.04671 (2016).
		"""

		def __init__(self,

deepchem/models/robust_multitask.py

+21 −4

Original line number	Diff line number	Diff line
		@@ -2,17 +2,28 @@ import numpy as np
		import tensorflow as tf
		import collections

		import logging
		from deepchem.metrics import to_one_hot
		from deepchem.models import KerasModel
		from deepchem.models.layers import Stack
		from deepchem.models.losses import SoftmaxCrossEntropy, L2Loss

		logger = logging.getLogger(__name__)


		class RobustMultitaskClassifier(KerasModel):
		"""Implements a neural network for robust multitasking.

		Key idea is to have bypass layers that feed directly from features to task
		output. Hopefully will allow tasks to route around bad multitasking.
		The key idea of this model is to have bypass layers that feed
		directly from features to task output. This might provide some
		flexibility toroute around challenges in multitasking with
		destructive interference.

		References
		----------
		This technique was introduced in [1]_

		.. [1] Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.

		"""

		@@ -194,8 +205,14 @@ class RobustMultitaskClassifier(KerasModel):
		class RobustMultitaskRegressor(KerasModel):
		"""Implements a neural network for robust multitasking.

		Key idea is to have bypass layers that feed directly from features to task
		output. Hopefully will allow tasks to route around bad multitasking.
		The key idea of this model is to have bypass layers that feed
		directly from features to task output. This might provide some
		flexibility toroute around challenges in multitasking with
		destructive interference.

		References
		----------
		.. [1] Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.

		"""

deepchem/models/sklearn_models/init.py

+4 −8

Original line number	Diff line number	Diff line
		@@ -27,23 +27,19 @@ class SklearnModel(Model):
		Abstract base class for different ML models.
		"""

		def __init__(self,
		model_instance=None,
		model_dir=None,
		verbose=True,
		**kwargs):
		def __init__(self, model_instance=None, model_dir=None, **kwargs):
		"""
		Parameters
		----------
		model_instance: sklearn model
		Instance of model to wrap.
		model_dir: str
		verbose: bool
		If specified, the model will be saved in this directory.
		kwargs: dict
		kwargs['use_weights'] is a bool which determines if we pass weights into
		self.model_instance.fit()
		"""
		super(SklearnModel, self).__init__(model_instance, model_dir, verbose,
		**kwargs)
		super(SklearnModel, self).__init__(model_instance, model_dir, **kwargs)
		if 'use_weights' in kwargs:
		self.use_weights = kwargs['use_weights']
		else:

deepchem/models/xgboost_models/init.py

+4 −9

Original line number	Diff line number	Diff line
		@@ -17,11 +17,7 @@ class XGBoostModel(SklearnModel):
		Abstract base class for XGBoost model.
		"""

		def __init__(self,
		model_instance=None,
		model_dir=None,
		verbose=False,
		**kwargs):
		def __init__(self, model_instance=None, model_dir=None, **kwargs):
		"""Abstract class for XGBoost models.

		Parameters
		@@ -40,7 +36,6 @@ class XGBoostModel(SklearnModel):
		self.model_instance = model_instance
		self.model_class = model_instance.__class__

		self.verbose = verbose
		if 'early_stopping_rounds' in kwargs:
		self.early_stopping_rounds = kwargs['early_stopping_rounds']
		else:
		@@ -77,13 +72,13 @@ class XGBoostModel(SklearnModel):
		y_train,
		early_stopping_rounds=self.early_stopping_rounds,
		eval_metric=xgb_metric,
		eval_set=[(X_train, y_train), (X_test, y_test)],
		verbose=self.verbose)
		eval_set=[(X_train, y_train), (X_test, y_test)])

		# Since test size is 20%, when retrain model to whole data, expect
		# n_estimator increased to 1/0.8 = 1.25 time.
		estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25)
		self.model_instance.n_estimators = np.int64(estimated_best_round)
		self.model_instance.fit(X, y, eval_metric=xgb_metric, verbose=self.verbose)
		self.model_instance.fit(X, y, eval_metric=xgb_metric)

		def _search_param(self, metric, X, y):
		'''

Admin message