Unverified Commit 8e069fe2 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1994 from deepchem/logging

Remove verbose keyword and switch to logging in model classes
parents 07909475 adeaa4da
Loading
Loading
Loading
Loading
+7 −14
Original line number Original line Diff line number Diff line
"""
"""
Contains an abstract base class that supports different ML models.
Contains an abstract base class that supports different ML models.
"""
"""
__author__ = "Bharath Ramsundar and Joseph Gomes"
__copyright__ = "Copyright 2016, Stanford University"
__license__ = "MIT"


import sys
import sys
import numpy as np
import numpy as np
@@ -15,24 +12,22 @@ import tempfile
import sklearn
import sklearn
from sklearn.base import BaseEstimator
from sklearn.base import BaseEstimator


import logging
from deepchem.data import Dataset, pad_features
from deepchem.data import Dataset, pad_features
from deepchem.trans import undo_transforms
from deepchem.trans import undo_transforms
from deepchem.utils.save import load_from_disk
from deepchem.utils.save import load_from_disk
from deepchem.utils.save import save_to_disk
from deepchem.utils.save import save_to_disk
from deepchem.utils.save import log
from deepchem.utils.evaluate import Evaluator
from deepchem.utils.evaluate import Evaluator


logger = logging.getLogger(__name__)



class Model(BaseEstimator):
class Model(BaseEstimator):
  """
  """
  Abstract base class for different ML models.
  Abstract base class for different ML models.
  """
  """


  def __init__(self,
  def __init__(self, model_instance=None, model_dir=None, **kwargs):
               model_instance=None,
               model_dir=None,
               verbose=True,
               **kwargs):
    """Abstract class for all models.
    """Abstract class for all models.


    Parameters
    Parameters
@@ -53,8 +48,6 @@ class Model(BaseEstimator):
    self.model_instance = model_instance
    self.model_instance = model_instance
    self.model_class = model_instance.__class__
    self.model_class = model_instance.__class__


    self.verbose = verbose

  def __del__(self):
  def __del__(self):
    if 'model_dir_is_temp' in dir(self) and self.model_dir_is_temp:
    if 'model_dir_is_temp' in dir(self) and self.model_dir_is_temp:
      shutil.rmtree(self.model_dir)
      shutil.rmtree(self.model_dir)
@@ -113,13 +106,13 @@ class Model(BaseEstimator):
    # TODO(rbharath/enf): We need a structured way to deal with potential GPU
    # TODO(rbharath/enf): We need a structured way to deal with potential GPU
    #                     memory overflows.
    #                     memory overflows.
    for epoch in range(nb_epoch):
    for epoch in range(nb_epoch):
      log("Starting epoch %s" % str(epoch + 1), self.verbose)
      logger.info("Starting epoch %s" % str(epoch + 1))
      losses = []
      losses = []
      for (X_batch, y_batch, w_batch,
      for (X_batch, y_batch, w_batch,
           ids_batch) in dataset.iterbatches(batch_size):
           ids_batch) in dataset.iterbatches(batch_size):
        losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
        losses.append(self.fit_on_batch(X_batch, y_batch, w_batch))
      log("Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()),
      logger.info(
          self.verbose)
          "Avg loss for epoch %d: %f" % (epoch + 1, np.array(losses).mean()))


  def predict(self, dataset, transformers=[], batch_size=None):
  def predict(self, dataset, transformers=[], batch_size=None):
    """
    """
+9 −3
Original line number Original line Diff line number Diff line
@@ -3,7 +3,7 @@ import numpy as np
import tensorflow as tf
import tensorflow as tf
import collections
import collections


from deepchem.utils.save import log
import logging
from deepchem.metrics import to_one_hot
from deepchem.metrics import to_one_hot
from deepchem.metrics import from_one_hot
from deepchem.metrics import from_one_hot
from deepchem.models import KerasModel, layers
from deepchem.models import KerasModel, layers
@@ -11,16 +11,22 @@ from deepchem.models.losses import L2Loss, SparseSoftmaxCrossEntropy
from deepchem.models.keras_model import _StandardLoss
from deepchem.models.keras_model import _StandardLoss
from tensorflow.keras.layers import Input, Dense, Dropout, ReLU, Concatenate, Add, Multiply, Softmax
from tensorflow.keras.layers import Input, Dense, Dropout, ReLU, Concatenate, Add, Multiply, Softmax


logger = logging.getLogger(__name__)



class ProgressiveMultitaskRegressor(KerasModel):
class ProgressiveMultitaskRegressor(KerasModel):
  """Implements a progressive multitask neural network for regression.
  """Implements a progressive multitask neural network for regression.


  Progressive Networks: https://arxiv.org/pdf/1606.04671v3.pdf

  Progressive networks allow for multitask learning where each task
  Progressive networks allow for multitask learning where each task
  gets a new column of weights. As a result, there is no exponential
  gets a new column of weights. As a result, there is no exponential
  forgetting where previous tasks are ignored.
  forgetting where previous tasks are ignored.


  References
  ----------
  See [1]_ for a full description of the progressive architecture

  .. [1] Rusu, Andrei A., et al. "Progressive neural networks." arXiv preprint
         arXiv:1606.04671 (2016).
  """
  """


  def __init__(self,
  def __init__(self,
+21 −4
Original line number Original line Diff line number Diff line
@@ -2,17 +2,28 @@ import numpy as np
import tensorflow as tf
import tensorflow as tf
import collections
import collections


import logging
from deepchem.metrics import to_one_hot
from deepchem.metrics import to_one_hot
from deepchem.models import KerasModel
from deepchem.models import KerasModel
from deepchem.models.layers import Stack
from deepchem.models.layers import Stack
from deepchem.models.losses import SoftmaxCrossEntropy, L2Loss
from deepchem.models.losses import SoftmaxCrossEntropy, L2Loss


logger = logging.getLogger(__name__)



class RobustMultitaskClassifier(KerasModel):
class RobustMultitaskClassifier(KerasModel):
  """Implements a neural network for robust multitasking.
  """Implements a neural network for robust multitasking.


  Key idea is to have bypass layers that feed directly from features to task
  The key idea of this model is to have bypass layers that feed
  output. Hopefully will allow tasks to route around bad multitasking.
  directly from features to task output. This might provide some
  flexibility toroute around challenges in multitasking with
  destructive interference. 

  References
  ----------
  This technique was introduced in [1]_

  .. [1] Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.


  """
  """


@@ -194,8 +205,14 @@ class RobustMultitaskClassifier(KerasModel):
class RobustMultitaskRegressor(KerasModel):
class RobustMultitaskRegressor(KerasModel):
  """Implements a neural network for robust multitasking.
  """Implements a neural network for robust multitasking.


  Key idea is to have bypass layers that feed directly from features to task
  The key idea of this model is to have bypass layers that feed
  output. Hopefully will allow tasks to route around bad multitasking.
  directly from features to task output. This might provide some
  flexibility toroute around challenges in multitasking with
  destructive interference.

  References
  ----------
  .. [1]   Ramsundar, Bharath, et al. "Is multitask deep learning practical for pharma?." Journal of chemical information and modeling 57.8 (2017): 2068-2076.


  """
  """


+4 −8
Original line number Original line Diff line number Diff line
@@ -27,23 +27,19 @@ class SklearnModel(Model):
  Abstract base class for different ML models.
  Abstract base class for different ML models.
  """
  """


  def __init__(self,
  def __init__(self, model_instance=None, model_dir=None, **kwargs):
               model_instance=None,
               model_dir=None,
               verbose=True,
               **kwargs):
    """
    """
    Parameters
    Parameters
    ----------
    ----------
    model_instance: sklearn model
    model_instance: sklearn model
      Instance of model to wrap.
    model_dir: str
    model_dir: str
    verbose: bool
      If specified, the model will be saved in this directory.
    kwargs: dict
    kwargs: dict
      kwargs['use_weights'] is a bool which determines if we pass weights into
      kwargs['use_weights'] is a bool which determines if we pass weights into
      self.model_instance.fit()
      self.model_instance.fit()
    """
    """
    super(SklearnModel, self).__init__(model_instance, model_dir, verbose,
    super(SklearnModel, self).__init__(model_instance, model_dir, **kwargs)
                                       **kwargs)
    if 'use_weights' in kwargs:
    if 'use_weights' in kwargs:
      self.use_weights = kwargs['use_weights']
      self.use_weights = kwargs['use_weights']
    else:
    else:
+4 −9
Original line number Original line Diff line number Diff line
@@ -17,11 +17,7 @@ class XGBoostModel(SklearnModel):
  Abstract base class for XGBoost model.
  Abstract base class for XGBoost model.
  """
  """


  def __init__(self,
  def __init__(self, model_instance=None, model_dir=None, **kwargs):
               model_instance=None,
               model_dir=None,
               verbose=False,
               **kwargs):
    """Abstract class for XGBoost models.
    """Abstract class for XGBoost models.


    Parameters
    Parameters
@@ -40,7 +36,6 @@ class XGBoostModel(SklearnModel):
    self.model_instance = model_instance
    self.model_instance = model_instance
    self.model_class = model_instance.__class__
    self.model_class = model_instance.__class__


    self.verbose = verbose
    if 'early_stopping_rounds' in kwargs:
    if 'early_stopping_rounds' in kwargs:
      self.early_stopping_rounds = kwargs['early_stopping_rounds']
      self.early_stopping_rounds = kwargs['early_stopping_rounds']
    else:
    else:
@@ -77,13 +72,13 @@ class XGBoostModel(SklearnModel):
        y_train,
        y_train,
        early_stopping_rounds=self.early_stopping_rounds,
        early_stopping_rounds=self.early_stopping_rounds,
        eval_metric=xgb_metric,
        eval_metric=xgb_metric,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_set=[(X_train, y_train), (X_test, y_test)])
        verbose=self.verbose)

    # Since test size is 20%, when retrain model to whole data, expect
    # Since test size is 20%, when retrain model to whole data, expect
    # n_estimator increased to 1/0.8 = 1.25 time.
    # n_estimator increased to 1/0.8 = 1.25 time.
    estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25)
    estimated_best_round = np.round(self.model_instance.best_ntree_limit * 1.25)
    self.model_instance.n_estimators = np.int64(estimated_best_round)
    self.model_instance.n_estimators = np.int64(estimated_best_round)
    self.model_instance.fit(X, y, eval_metric=xgb_metric, verbose=self.verbose)
    self.model_instance.fit(X, y, eval_metric=xgb_metric)


  def _search_param(self, metric, X, y):
  def _search_param(self, metric, X, y):
    '''
    '''