Commit 87aad9d2 authored by Kevin Shen's avatar Kevin Shen
Browse files

fixed unit tests, styling, docs

modified docs
parent 888f747b
Loading
Loading
Loading
Loading
+58 −67
Original line number Diff line number Diff line
@@ -204,7 +204,8 @@ class KerasModel(Model):
      self.wandb_logger.setup()

    # Update config with KerasModel params
    wandb_logger_config = dict(loss=loss,
    wandb_logger_config = dict(
        loss=loss,
        output_types=output_types,
        batch_size=batch_size,
        model_dir=model_dir,
@@ -261,8 +262,8 @@ class KerasModel(Model):
    self._built = True
    self._global_step = tf.Variable(0, trainable=False)
    self._tf_optimizer = self.optimizer._create_tf_optimizer(self._global_step)
    self._checkpoint = tf.train.Checkpoint(optimizer=self._tf_optimizer,
                                           model=self.model)
    self._checkpoint = tf.train.Checkpoint(
        optimizer=self._tf_optimizer, model=self.model)

  def _create_inputs(self, example_inputs: List) -> None:
    """The first time this is called, create tensors representing the inputs and outputs."""
@@ -280,8 +281,8 @@ class KerasModel(Model):
          for x in example_inputs
      ]

  def _create_training_ops(self, example_batch: Tuple[List, List,
                                                      List]) -> None:
  def _create_training_ops(self,
                           example_batch: Tuple[List, List, List]) -> None:
    """The first time this is called, create tensors used in optimization."""
    if self._training_ops_built:
      return
@@ -346,11 +347,10 @@ class KerasModel(Model):
    The average loss over the most recent checkpoint interval
   """
    return self.fit_generator(
        self.default_generator(dataset,
                               epochs=nb_epoch,
                               deterministic=deterministic),
        max_checkpoints_to_keep, checkpoint_interval, restore, variables, loss,
        callbacks, all_losses)
        self.default_generator(
            dataset, epochs=nb_epoch,
            deterministic=deterministic), max_checkpoints_to_keep,
        checkpoint_interval, restore, variables, loss, callbacks, all_losses)

  def fit_generator(self,
                    generator: Iterable[Tuple[Any, Any, Any]],
@@ -445,8 +445,8 @@ class KerasModel(Model):
      should_log = (current_step % self.log_frequency == 0)
      if should_log:
        avg_loss = float(avg_loss) / averaged_batches
        logger.info('Ending global_step %d: Average loss %g' %
                    (current_step, avg_loss))
        logger.info(
            'Ending global_step %d: Average loss %g' % (current_step, avg_loss))
        if all_losses is not None:
          all_losses.append(avg_loss)
        # Capture the last avg_loss in case of return since we're resetting to
@@ -465,9 +465,7 @@ class KerasModel(Model):
      if self.wandb and should_log:
        wandb.log({'loss': batch_loss}, step=current_step)
      if (self.wandb_logger is not None) and should_log:
        all_data = dict({
            'train/loss': batch_loss
        })
        all_data = dict({'train/loss': batch_loss})
        self.wandb_logger.log_data(all_data, step=current_step)

    if self.wandb:
@@ -480,8 +478,8 @@ class KerasModel(Model):
    # Report final results.
    if averaged_batches > 0:
      avg_loss = float(avg_loss) / averaged_batches
      logger.info('Ending global_step %d: Average loss %g' %
                  (current_step, avg_loss))
      logger.info(
          'Ending global_step %d: Average loss %g' % (current_step, avg_loss))
      if all_losses is not None:
        all_losses.append(avg_loss)
      last_avg_loss = avg_loss
@@ -560,19 +558,19 @@ class KerasModel(Model):
    """
    self._ensure_built()
    dataset = NumpyDataset(X, y, w)
    return self.fit(dataset,
    return self.fit(
        dataset,
        nb_epoch=1,
        max_checkpoints_to_keep=max_checkpoints_to_keep,
                    checkpoint_interval=self._global_step.numpy() +
                    2 if checkpoint else 0,
        checkpoint_interval=self._global_step.numpy() + 2 if checkpoint else 0,
        variables=variables,
        loss=loss,
        callbacks=callbacks)

  def _predict(
      self, generator: Iterable[Tuple[Any, Any,
                                      Any]], transformers: List[Transformer],
      outputs: Optional[OneOrMany[tf.Tensor]], uncertainty: bool,
      self, generator: Iterable[Tuple[Any, Any, Any]],
      transformers: List[Transformer], outputs: Optional[OneOrMany[tf.Tensor]],
      uncertainty: bool,
      other_output_types: Optional[OneOrMany[str]]) -> OneOrMany[np.ndarray]:
    """
    Predict outputs for data provided by a generator.
@@ -767,10 +765,8 @@ class KerasModel(Model):
    dataset = NumpyDataset(X=X, y=None)
    return self.predict(dataset, transformers, outputs)

  def predict_uncertainty_on_batch(
      self,
      X: Sequence,
      masks: int = 50) -> OneOrMany[Tuple[np.ndarray, np.ndarray]]:
  def predict_uncertainty_on_batch(self, X: Sequence, masks: int = 50
                                  ) -> OneOrMany[Tuple[np.ndarray, np.ndarray]]:
    """
    Predict the model's outputs, along with the uncertainty in each one.

@@ -828,11 +824,10 @@ class KerasModel(Model):
    a NumPy array of the model produces a single output, or a list of arrays
    if it produces multiple outputs
    """
    generator = self.default_generator(dataset,
                                       mode='predict',
                                       deterministic=True,
                                       pad_batches=False)
    return self.predict_on_generator(generator,
    generator = self.default_generator(
        dataset, mode='predict', deterministic=True, pad_batches=False)
    return self.predict_on_generator(
        generator,
        transformers=transformers,
        outputs=outputs,
        output_types=output_types)
@@ -853,15 +848,12 @@ class KerasModel(Model):
    a NumPy array of the embeddings model produces, or a list
    of arrays if it produces multiple embeddings
    """
    generator = self.default_generator(dataset,
                                       mode='predict',
                                       pad_batches=False)
    generator = self.default_generator(
        dataset, mode='predict', pad_batches=False)
    return self._predict(generator, [], None, False, ['embedding'])

  def predict_uncertainty(
      self,
      dataset: Dataset,
      masks: int = 50) -> OneOrMany[Tuple[np.ndarray, np.ndarray]]:
  def predict_uncertainty(self, dataset: Dataset, masks: int = 50
                         ) -> OneOrMany[Tuple[np.ndarray, np.ndarray]]:
    """
    Predict the model's outputs, along with the uncertainty in each one.

@@ -889,9 +881,8 @@ class KerasModel(Model):
    sum_sq_pred: List[np.ndarray] = []
    sum_var: List[np.ndarray] = []
    for i in range(masks):
      generator = self.default_generator(dataset,
                                         mode='uncertainty',
                                         pad_batches=False)
      generator = self.default_generator(
          dataset, mode='uncertainty', pad_batches=False)
      results = self._predict(generator, [], None, True, None)
      if len(sum_pred) == 0:
        for p, v in results:
@@ -972,8 +963,8 @@ class KerasModel(Model):
    # Use a GradientTape to compute gradients.

    X = tf.constant(X[0])
    with tf.GradientTape(persistent=True,
                         watch_accessed_variables=False) as tape:
    with tf.GradientTape(
        persistent=True, watch_accessed_variables=False) as tape:
      tape.watch(X)
      outputs = self._compute_model(X)
      if tf.is_tensor(outputs):
@@ -991,8 +982,8 @@ class KerasModel(Model):
      return final_result[0]
    return final_result

  def _prepare_batch(self, batch: Tuple[Any, Any,
                                        Any]) -> Tuple[List, List, List]:
  def _prepare_batch(self,
                     batch: Tuple[Any, Any, Any]) -> Tuple[List, List, List]:
    inputs, labels, weights = batch
    inputs = [
        x if x.dtype == t else x.astype(t)
@@ -1052,8 +1043,8 @@ class KerasModel(Model):
    ([inputs], [outputs], [weights])
    """
    for epoch in range(epochs):
      for (X_b, y_b, w_b,
           ids_b) in dataset.iterbatches(batch_size=self.batch_size,
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        yield ([X_b], [y_b], [w_b])
@@ -1243,8 +1234,8 @@ class KerasModel(Model):

    if assignment_map is None:
      logger.info("No assignment map provided. Creating custom assignment map.")
      assignment_map = self._create_assignment_map(source_model=source_model,
                                                   include_top=include_top)
      assignment_map = self._create_assignment_map(
          source_model=source_model, include_top=include_top)

    for source_var, dest_var in assignment_map.items():
      assert source_var.deref().shape == dest_var.shape
+43 −42
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ import deepchem as dc
import numpy as np
import tensorflow as tf


def test_overfit_graph_model():
  """Test fitting a KerasModel defined as a graph."""
  n_data_points = 10
@@ -17,7 +18,8 @@ def test_overfit_graph_model():
  logits = tf.keras.layers.Dense(1)(hidden)
  outputs = tf.keras.layers.Activation('sigmoid')(logits)
  keras_model = tf.keras.Model(inputs=inputs, outputs=[outputs, logits])
  model = dc.models.KerasModel(keras_model,
  model = dc.models.KerasModel(
      keras_model,
      dc.models.losses.SigmoidCrossEntropy(),
      output_types=['prediction', 'loss'],
      learning_rate=0.005)
@@ -45,9 +47,8 @@ def test_overfit_sequential_model():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  model.fit(dataset, nb_epoch=1000)
  prediction = np.squeeze(model.predict_on_batch(X))
  assert np.array_equal(y, np.round(prediction))
@@ -68,7 +69,8 @@ def test_fit_use_all_losses():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(keras_model,
  model = dc.models.KerasModel(
      keras_model,
      dc.models.losses.BinaryCrossEntropy(),
      learning_rate=0.005,
      log_frequency=10)
@@ -90,9 +92,8 @@ def test_fit_on_batch():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  i = 0
  for X, y, w, ids in dataset.iterbatches(model.batch_size, 500):
    i += 1
@@ -112,9 +113,8 @@ def test_checkpointing():
  keras_model1 = tf.keras.Sequential([tf.keras.layers.Dense(10)])
  keras_model2 = tf.keras.Sequential([tf.keras.layers.Dense(10)])
  model1 = dc.models.KerasModel(keras_model1, dc.models.losses.L2Loss())
  model2 = dc.models.KerasModel(keras_model2,
                                dc.models.losses.L2Loss(),
                                model_dir=model1.model_dir)
  model2 = dc.models.KerasModel(
      keras_model2, dc.models.losses.L2Loss(), model_dir=model1.model_dir)

  # Check that they produce different results.

@@ -148,9 +148,8 @@ def test_fit_restore():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  model.fit(dataset, nb_epoch=1000)
  prediction = np.squeeze(model.predict_on_batch(X))
  assert np.array_equal(y, np.round(prediction))
@@ -162,7 +161,8 @@ def test_fit_restore():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model2 = dc.models.KerasModel(keras_model2,
  model2 = dc.models.KerasModel(
      keras_model2,
      dc.models.losses.BinaryCrossEntropy(),
      model_dir=model.model_dir)
  model2.fit(dataset, nb_epoch=1, restore=True)
@@ -188,8 +188,8 @@ def test_uncertainty():
  output = tf.keras.layers.Dense(n_features)(dropout)
  log_var = tf.keras.layers.Dense(n_features)(dropout)
  var = tf.keras.layers.Activation(tf.exp)(log_var)
  keras_model = tf.keras.Model(inputs=[inputs, switch],
                               outputs=[output, var, output, log_var])
  keras_model = tf.keras.Model(
      inputs=[inputs, switch], outputs=[output, var, output, log_var])

  def loss(outputs, labels, weights):
    diff = labels[0] - outputs[0]
@@ -206,8 +206,8 @@ def test_uncertainty():
                          deterministic=True,
                          pad_batches=True):
      for epoch in range(epochs):
        for (X_b, y_b, w_b,
             ids_b) in dataset.iterbatches(batch_size=self.batch_size,
        for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
            batch_size=self.batch_size,
            deterministic=deterministic,
            pad_batches=pad_batches):
          if mode == 'predict':
@@ -283,7 +283,8 @@ def test_tensorboard():
  keras_model = tf.keras.Sequential([
      tf.keras.layers.Dense(2, activation='softmax'),
  ])
  model = dc.models.KerasModel(keras_model,
  model = dc.models.KerasModel(
      keras_model,
      dc.models.losses.CategoricalCrossEntropy(),
      tensorboard=True,
      log_frequency=1)
@@ -299,8 +300,8 @@ def test_tensorboard():
def test_wandblogger():
  """Test logging to Weights & Biases."""
  # Load dataset and Models
  tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='ECFP',
                                                         splitter='random')
  tasks, datasets, transformers = dc.molnet.load_delaney(
      featurizer='ECFP', splitter='random')
  train_dataset, valid_dataset, test_dataset = datasets
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
  wandblogger = dc.models.WandbLogger(anonymous="allow", save_run_history=True)
@@ -308,16 +309,16 @@ def test_wandblogger():
  keras_model = tf.keras.Sequential(
      [tf.keras.layers.Dense(10, activation='relu'),
       tf.keras.layers.Dense(1)])
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.L2Loss(),
                               wandb_logger=wandblogger)
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.L2Loss(), wandb_logger=wandblogger)
  vc = dc.models.ValidationCallback(valid_dataset, 1, [metric])
  model.fit(train_dataset, nb_epoch=10, callbacks=[vc])

  run_data = wandblogger.run_history._data
  run_data = wandblogger.run_history
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  assert math.isclose(valid_score["pearson_r2_score"],
  assert math.isclose(
      valid_score["pearson_r2_score"],
      run_data['eval/pearson_r2_score'],
      abs_tol=0.0005)

+14 −12
Original line number Diff line number Diff line
@@ -334,12 +334,13 @@ def test_tensorboard():
  file_size = os.stat(event_file).st_size
  assert file_size > 0


@unittest.skipIf(not has_pytorch, 'PyTorch is not installed')
def test_wandblogger():
  """Test logging to Weights & Biases."""
  # Load dataset and Models
  tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='ECFP',
                                                         splitter='random')
  tasks, datasets, transformers = dc.molnet.load_delaney(
      featurizer='ECFP', splitter='random')
  train_dataset, valid_dataset, test_dataset = datasets
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
  wandblogger = dc.models.WandbLogger(anonymous="allow", save_run_history=True)
@@ -348,19 +349,20 @@ def test_wandblogger():
      torch.nn.Linear(1024, 1000),
      torch.nn.Dropout(p=0.5),
      torch.nn.Linear(1000, 1))
  model = dc.models.TorchModel(pytorch_model,
                               dc.models.losses.L2Loss(),
                               wandb_logger=wandblogger)
  model = dc.models.TorchModel(
      pytorch_model, dc.models.losses.L2Loss(), wandb_logger=wandblogger)
  vc = dc.models.ValidationCallback(valid_dataset, 1, [metric])
  model.fit(train_dataset, nb_epoch=10, callbacks=[vc])

  run_data = wandblogger.run_history._data
  run_data = wandblogger.run_history
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  assert math.isclose(valid_score["pearson_r2_score"],
  assert math.isclose(
      valid_score["pearson_r2_score"],
      run_data['eval/pearson_r2_score'],
      abs_tol=0.0005)


@unittest.skipIf(not has_pytorch, 'PyTorch is not installed')
def test_fit_variables():
  """Test training a subset of the variables in a model."""
+10 −11
Original line number Diff line number Diff line
@@ -196,7 +196,8 @@ class TorchModel(Model):
      self.wandb_logger.setup()

    # Update config with KerasModel params
    wandb_logger_config = dict(loss=loss,
    wandb_logger_config = dict(
        loss=loss,
        output_types=output_types,
        batch_size=batch_size,
        model_dir=model_dir,
@@ -429,9 +430,7 @@ class TorchModel(Model):
      if self.wandb and should_log:
        wandb.log({'loss': batch_loss}, step=current_step)
      if (self.wandb_logger is not None) and should_log:
        all_data = dict({
          'train/loss': batch_loss
        })
        all_data = dict({'train/loss': batch_loss})
        self.wandb_logger.log_data(all_data, step=current_step)

    if self.wandb:
+11 −10
Original line number Diff line number Diff line
import copy
import logging
import importlib.util
from typing import Optional, Union
@@ -70,7 +69,8 @@ class WandbLogger(object):
    self.save_run_history = save_run_history

    # set wandb init arguments
    self.wandb_init_params = dict(name=name,
    self.wandb_init_params = dict(
        name=name,
        project=project,
        entity=entity,
        mode=mode,
@@ -108,7 +108,8 @@ class WandbLogger(object):
    Save run history data as field if configured to do that.
    """
    if self.save_run_history:
      self.run_history = copy.deepcopy(self.wandb_run.history)
      history = self.wandb_run.history._data
      self.run_history = history
    self.wandb_run.finish()

  def update_config(self, config_data):
Loading