Commit b6beb3bf authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Adding a unit test

parent ae9a19a0
Loading
Loading
Loading
Loading
+29 −14
Original line number Diff line number Diff line
@@ -272,7 +272,8 @@ class KerasModel(Model):
          restore: bool = False,
          variables: Optional[List[tf.Variable]] = None,
          loss: Optional[KerasLossFn] = None,
          callbacks: Union[Callable, List[Callable]] = []) -> List[float]:
          callbacks: Union[Callable, List[Callable]] = [],
          return_loss_curve: bool = False) -> Union[float, List[float]]:
    """Train this model on a dataset.

    Parameters
@@ -302,16 +303,20 @@ class KerasModel(Model):
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be invoked after
      every step.  This can be used to perform validation, logging, etc.
    return_loss_curve: bool, optional (default False)
      If `True` return the full set of average losses computed over the
      process of fitting. Else return the last computed average loss.

    Returns
    -------
    the average loss over the most recent checkpoint interval
    Either the average loss over the most recent checkpoint interval or a list
    of all such average losses over the course of fitting.
   """
    return self.fit_generator(
        self.default_generator(
            dataset, epochs=nb_epoch,
            deterministic=deterministic), max_checkpoints_to_keep,
        checkpoint_interval, restore, variables, loss, callbacks)
            dataset, epochs=nb_epoch, deterministic=deterministic),
        max_checkpoints_to_keep, checkpoint_interval, restore, variables, loss,
        callbacks, return_loss_curve)

  def fit_generator(
      self,
@@ -321,7 +326,8 @@ class KerasModel(Model):
      restore: bool = False,
      variables: Optional[List[tf.Variable]] = None,
      loss: Optional[KerasLossFn] = None,
      callbacks: Union[Callable, List[Callable]] = []) -> List[float]:
      callbacks: Union[Callable, List[Callable]] = [],
      return_loss_curve: bool = False) -> Union[float, List[float]]:
    """Train this model on data from a generator.

    Parameters
@@ -347,10 +353,14 @@ class KerasModel(Model):
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be invoked after
      every step.  This can be used to perform validation, logging, etc.
    return_loss_curve: bool, optional (default False)
      If `True` return the full set of average losses computed over the
      process of fitting. Else return the last computed average loss.

    Returns
    -------
    the average loss over the most recent checkpoint interval
    Either the average loss over the most recent checkpoint interval or a list
    of all such average losses over the course of fitting.
    """
    if not isinstance(callbacks, SequenceCollection):
      callbacks = [callbacks]
@@ -431,7 +441,13 @@ class KerasModel(Model):

    time2 = time.time()
    logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    if return_loss_curve:
      return avg_losses
    else:
      if len(avg_losses) > 0:
        return avg_losses[-1]
      else:
        return 0.0

  def _create_gradient_fn(self,
                          variables: Optional[List[tf.Variable]]) -> Callable:
@@ -500,18 +516,17 @@ class KerasModel(Model):
    """
    self._ensure_built()
    dataset = NumpyDataset(X, y, w)
    losses = self.fit(
    # We set return_loss_curve=False, so we know this is a float, but mypy
    # can't automatically infer that.
    return self.fit(  # type: ignore
        dataset,
        nb_epoch=1,
        max_checkpoints_to_keep=max_checkpoints_to_keep,
        checkpoint_interval=self._global_step.numpy() + 2 if checkpoint else 0,
        variables=variables,
        loss=loss,
        callbacks=callbacks)
    if len(losses) != 1:
      raise ValueError(
          "Each batch should take only one global step to fit. Unknown error.")
    return losses[0]
        callbacks=callbacks,
        return_loss_curve=False)

  def _predict(
      self, generator: Iterable[Tuple[Any, Any, Any]],
+3 −2
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ from deepchem.utils.save import load_from_disk
from deepchem.utils.save import save_to_disk
from deepchem.utils.evaluate import Evaluator

from typing import Any, Dict, List, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence, Union
from deepchem.utils.typing import OneOrMany

logger = logging.getLogger(__name__)
@@ -127,7 +127,8 @@ class Model(BaseEstimator):
    """
    raise NotImplementedError

  def fit(self, dataset: Dataset, nb_epoch: int = 10) -> List[float]:
  def fit(self, dataset: Dataset,
          nb_epoch: int = 10) -> Union[float, List[float]]:
    """
    Fits a model on data in a Dataset object.

+21 −0
Original line number Diff line number Diff line
@@ -58,6 +58,27 @@ def test_overfit_sequential_model():
  assert scores[metric.name] > 0.9


def test_fit_return_loss_curve():
  """Test fitting a KerasModel and getting a loss curve back."""
  n_data_points = 10
  n_features = 2
  X = np.random.rand(n_data_points, n_features)
  y = (X[:, 0] > X[:, 1]).astype(np.float32)
  dataset = dc.data.NumpyDataset(X, y)
  keras_model = tf.keras.Sequential([
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(
      keras_model,
      dc.models.losses.BinaryCrossEntropy(),
      learning_rate=0.005,
      log_frequency=10)
  losses = model.fit(dataset, nb_epoch=1000, return_loss_curve=True)
  # Each epoch is a single step for this model
  assert len(losses) == 100


def test_fit_on_batch():
  """Test fitting a KerasModel to individual batches."""
  n_data_points = 10