Commit 4340cd34 authored by Kevin Shen's avatar Kevin Shen
Browse files

refactoring: use pre-existing logging api

parent 471faded
Loading
Loading
Loading
Loading
+32 −7
Original line number Diff line number Diff line
@@ -2,16 +2,15 @@
Callback functions that can be invoked while fitting a KerasModel.
"""
import sys
import math


class ValidationCallback(object):
  """Performs validation while training a KerasModel.

  This is a callback that can be passed to fit().  It periodically computes a
  set of metrics over a validation set and writes them to a file.  In addition,
  it can save the best model parameters found so far to a directory on disk,
  updating them every time it finds a new best validation score.

  If Tensorboard logging is enabled on the KerasModel, the metrics are also
  logged to Tensorboard.  This only happens when validation coincides with a
  step on which the model writes to the log.  You should therefore make sure
@@ -26,9 +25,9 @@ class ValidationCallback(object):
               output_file=sys.stdout,
               save_dir=None,
               save_metric=0,
               save_on_minimum=True):
               save_on_minimum=True,
               logging_strategy="step"):
    """Create a ValidationCallback.

    Parameters
    ----------
    dataset: dc.data.Dataset
@@ -49,6 +48,10 @@ class ValidationCallback(object):
      if True, the best model is considered to be the one that minimizes the
      validation metric.  If False, the best model is considered to be the one
      that maximizes it.
    logging_strategy: str
      the logging strategy used for logging (step or epoch). If "step",
      logging interval will be the value provided for `interval`. If "epoch",
      then logging will happen at the end of every training epoch.
    """
    self.dataset = dataset
    self.interval = interval
@@ -58,10 +61,15 @@ class ValidationCallback(object):
    self.save_metric = save_metric
    self.save_on_minimum = save_on_minimum
    self._best_score = None
    if logging_strategy != "step" and logging_strategy != "epoch":
      print(
          "ValidationCallback: `logging_strategy` needs to be either 'step' or 'epoch'. Defaulting to 'step'."
      )
      logging_strategy = "step"
    self.logging_strategy = logging_strategy

  def __call__(self, model, step):
    """This is invoked by the KerasModel after every step of fitting.

    Parameters
    ----------
    model: KerasModel
@@ -69,7 +77,15 @@ class ValidationCallback(object):
    step: int
      the index of the training step that has just completed
    """
    if step % self.interval != 0:

    # Check if we should log to Wandb on this iteration
    steps_per_epoch = math.ceil(len(model.dataset) / model.batch_size)
    should_log = False
    if (self.logging_strategy == "step" and step % self.interval == 0) or \
            (self.logging_strategy == "epoch" and step % steps_per_epoch == 0):
      should_log = True

    if should_log is False:
      return
    scores = model.evaluate(self.dataset, self.metrics)
    message = 'Step %d validation:' % step
@@ -80,6 +96,10 @@ class ValidationCallback(object):
      for key in scores:
        model._log_scalar_to_tensorboard(key, scores[key],
                                         model.get_global_step())
    if model.wandb:
      import wandb
      wandb.log(scores, step=step)

    if self.save_dir is not None:
      score = scores[self.metrics[self.save_metric].name]
      if not self.save_on_minimum:
@@ -87,3 +107,8 @@ class ValidationCallback(object):
      if self._best_score is None or score < self._best_score:
        model.save_checkpoint(model_dir=self.save_dir)
        self._best_score = score

    if model.wandb_logger is not None:
      # Log data to Wandb
      data = {'eval/' + k: v for k, v in scores.items()}
      model.wandb_logger.log_data(data, step)
+108 −11
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ import tensorflow as tf
import time
import logging
import os
import math

try:
  from collections.abc import Sequence as SequenceCollection
@@ -21,6 +22,19 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tupl
from deepchem.utils.typing import ArrayLike, LossFn, OneOrMany
from deepchem.models.wandblogger import WandbLogger

try:
  import wandb
  wandb.ensure_configured()
  if wandb.api.api_key is None:
    _has_wandb = False
    wandb.termwarn(
        "W&B installed but not logged in.  Run `wandb login` or set the WANDB_API_KEY env variable."
    )
  else:
    _has_wandb = True
except (ImportError, AttributeError):
  _has_wandb = False

logger = logging.getLogger(__name__)


@@ -120,8 +134,10 @@ class KerasModel(Model):
               learning_rate: Union[float, LearningRateSchedule] = 0.001,
               optimizer: Optional[Optimizer] = None,
               tensorboard: bool = False,
               wandb_logger: Optional[WandbLogger] = None,
               wandb: bool = False,
               log_frequency: int = 100,
               logging_strategy: Optional[str] = "step",
               wandb_logger: Optional[WandbLogger] = None,
               **kwargs) -> None:
    """Create a new KerasModel.

@@ -147,8 +163,8 @@ class KerasModel(Model):
      ignored.
    tensorboard: bool
      whether to log progress to TensorBoard during training
    wandb_logger: WandbLogger
      the Weights & Biases logger to log data and metrics
    wandb: bool
      whether to log progress to Weights & Biases during training (deprecated)
    log_frequency: int
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
@@ -157,6 +173,8 @@ class KerasModel(Model):
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    wandb_logger: WandbLogger
      the Weights & Biases logger to log data and metrics
    """
    super(KerasModel, self).__init__(model=model, model_dir=model_dir, **kwargs)
    if isinstance(loss, Loss):
@@ -170,6 +188,17 @@ class KerasModel(Model):
      self.optimizer = optimizer
    self.tensorboard = tensorboard

    # W&B flag support (DEPRECATED)
    if wandb:
      logger.warning(
          "'wandb' argument is deprecated. Please use wandb_logger instead. "
          "This argument will be removed in a future release of DeepChem.")
    if wandb and not _has_wandb:
      logger.warning(
          "You set wandb to True but W&B is not installed. To use wandb logging, "
          "run `pip install wandb; wandb login`")
    self.wandb = wandb and _has_wandb

    self.wandb_logger = wandb_logger

    # Setup and initialize W&B logging
@@ -190,6 +219,14 @@ class KerasModel(Model):
    if self.wandb_logger is not None:
      self.wandb_logger.update_config(wandb_logger_config)

    # Check for valid logging strategy
    if logging_strategy != "step" and logging_strategy != "epoch":
      logger.warning(
          "Warning: `logging_strategy` needs to be either 'step' or 'epoch'. Defaulting to 'step'."
      )
      logging_strategy = "step"
    self.logging_strategy = logging_strategy

    # Backwards compatibility
    if "tensorboard_log_frequency" in kwargs:
      logger.warning(
@@ -279,7 +316,8 @@ class KerasModel(Model):
          variables: Optional[List[tf.Variable]] = None,
          loss: Optional[LossFn] = None,
          callbacks: Union[Callable, List[Callable]] = [],
          all_losses: Optional[List[float]] = None) -> float:
          all_losses: Optional[List[float]] = None,
          metrics: Optional[List[Metric]] = None) -> float:
    """Train this model on a dataset.

    Parameters
@@ -313,11 +351,16 @@ class KerasModel(Model):
      If specified, all logged losses are appended into this list. Note that
      you can call `fit()` repeatedly with the same list and losses will
      continue to be appended.
    metrics: Optional[List[Metric]], optional (default None)
      metrics to compute on the dataset used during training. If None,
      no metrics and scores will be computed and only training loss will be logged.

    Returns
    -------
    The average loss over the most recent checkpoint interval
   """
    self.dataset = dataset
    self.metrics = metrics
    return self.fit_generator(
        self.default_generator(dataset,
                               epochs=nb_epoch,
@@ -396,9 +439,8 @@ class KerasModel(Model):

    # Main training loop.

    # Warn if both ValidationCallback and WandbLogger present
    if self.wandb_logger is not None:
      self.wandb_logger.check_other_loggers(callbacks)
    # Calculate the number of steps in a training epoch
    steps_per_epoch = math.ceil(len(self.dataset) / self.batch_size)

    for batch in generator:
      self._create_training_ops(batch)
@@ -432,22 +474,45 @@ class KerasModel(Model):
        avg_loss = 0.0
        averaged_batches = 0

      # Calculate epoch number, sample count, and metrics
      epoch_num = self._get_epoch_num(current_step)
      sample_count = self._get_sample_count(current_step)

      # Decide whether to calculate metrics at this current step
      scores = None
      if self.metrics is not None and self.metrics:
        if (self.logging_strategy == "step" and current_step % self.log_frequency == 0) or \
           (self.logging_strategy == "epoch" and current_step % steps_per_epoch == 0):
          scores = self.evaluate(self.dataset, self.metrics)

      if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1:
        manager.save()
      for c in callbacks:
        c(self, current_step)
      if self.tensorboard and should_log:
        self._log_scalar_to_tensorboard('loss', batch_loss, current_step)
      # Wandb flag support (DEPRECATED)
      if self.wandb and should_log:
        wandb.log({'loss': batch_loss}, step=current_step)

      if self.wandb_logger is not None:
        # Calculate epoch number, sample count number, and log to wandb
        self.wandb_logger.calculate_epoch_and_sample_count(current_step)
        self.wandb_logger.log(self, {'train/loss': batch_loss},
                              step=current_step)
        all_data = dict({
            'train/epoch': epoch_num,
            'train/sample_count': sample_count,
            'train/loss': batch_loss
        })
        if scores is not None:
          scores = {'train/' + k: v for k, v in scores.items()}
          all_data.update(scores)
        self.wandb_logger.log_data(all_data, step=current_step)

    # Close WandbLogger
    if self.wandb_logger is not None:
      self.wandb_logger.finish()

    if self.wandb:
      wandb.finish()

    # Report final results.
    if averaged_batches > 0:
      avg_loss = float(avg_loss) / averaged_batches
@@ -1094,6 +1159,38 @@ class KerasModel(Model):
    """Get the number of steps of fitting that have been performed."""
    return int(self._global_step)

  def _get_epoch_num(self, step):
    """Get the epoch number corresponding to current step.

    Parameters
    ----------
    step: int
    the current step during training

    Returns
    -------
    the current step's epoch number (does not have to be an int)
    """
    dataset_size = len(self.dataset)
    steps_per_epoch = math.ceil(dataset_size / self.batch_size)
    epoch_num = step / steps_per_epoch
    return epoch_num

  def _get_sample_count(self, step):
    """Get the number of samples seen during training at step.

    Parameters
    ----------
    step: int
    the current step during training

    Returns
    -------
    the number of samples seen by the model by the current step
    """
    sample_count = step * self.batch_size
    return sample_count

  def _log_scalar_to_tensorboard(self, name: str, value: Any, step: int):
    """Log a scalar value to Tensorboard."""
    with self._summary_writer.as_default():
+5 −7
Original line number Diff line number Diff line
@@ -4,6 +4,8 @@ import deepchem as dc
import numpy as np
import tensorflow as tf

from deepchem.models import ValidationCallback


def test_overfit_graph_model():
  """Test fitting a KerasModel defined as a graph."""
@@ -307,16 +309,12 @@ def test_wandblogger():
      [tf.keras.layers.Dense(10, activation='relu'),
       tf.keras.layers.Dense(1)])
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
  wandblogger = dc.models.WandbLogger(train_dataset=train_dataset,
                                      eval_dataset=valid_dataset,
                                      metrics=[metric],
                                      logging_strategy="step",
                                      anonymous="allow",
                                      save_run_history=True)
  wandblogger = dc.models.WandbLogger(anonymous="allow", save_run_history=True)
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.L2Loss(),
                               wandb_logger=wandblogger)
  model.fit(train_dataset, nb_epoch=10)
  vc = ValidationCallback(valid_dataset, 1, [metric])
  model.fit(train_dataset, nb_epoch=10, metrics=[metric], callbacks=[vc])

  run_data = wandblogger.run_history._data
  valid_score = model.evaluate(valid_dataset, [metric], transformers)
+10 −114
Original line number Diff line number Diff line
import math
import copy
import logging
import importlib.util
from typing import List, Optional, Union
from deepchem.data import Dataset
from deepchem.metrics import Metric
from deepchem.models.callbacks import ValidationCallback
from typing import Optional, Union

logger = logging.getLogger(__name__)

@@ -28,10 +24,6 @@ class WandbLogger(object):
    """

  def __init__(self,
               train_dataset: Dataset,
               eval_dataset: Optional[Dataset] = None,
               metrics: Optional[List[Metric]] = None,
               logging_strategy: Optional[str] = "step",
               name: Optional[str] = None,
               entity: Optional[str] = None,
               project: Optional[str] = None,
@@ -40,20 +32,10 @@ class WandbLogger(object):
               id: Optional[str] = None,
               resume: Optional[Union[bool, str]] = None,
               anonymous: Optional[str] = "never",
               log_model: Optional[bool] = False,
               log_dataset: Optional[bool] = False,
               save_run_history: Optional[bool] = False,
               **kwargs):
    """Parameters
    ----------
    train_dataset: dc.data.Dataset
      the training set on which the model is run on
    eval_dataset: dc.data.Dataset
      the validation set on which to compute the metrics
    metrics: list of dc.metrics.Metric
      metrics to compute on eval_dataset
    logging_strategy: str
      the logging strategy used for logging (step or epoch)
    name: str
      a display name for the run in the W&B dashboard
    entity: str
@@ -70,10 +52,6 @@ class WandbLogger(object):
      sets the resuming behavior
    anonymous: str
      controls anonymous data logging
    log_model: bool
      whether to log the model to W&B
    log_dataset: bool
      whether to log the dataset to W&B
    save_run_history: bool
      whether to save the run history to the logger at the end (for testing purposes)
    """
@@ -83,31 +61,11 @@ class WandbLogger(object):
    import wandb
    self._wandb = wandb

    if mode == "offline" and log_model:
      raise Exception(
          f'Providing log_model={log_model} and mode={mode} is an invalid configuration'
          ' since model checkpoints cannot be uploaded in offline mode.\n'
          'Hint: Set `mode="online"` to log your model.')

    # Check for metrics and logging strategy
    if ((metrics is None) or (not metrics)) and (eval_dataset is not None):
      logger.warning(
          "Warning: No metrics are provided. "
          "Please provide a list of metrics to be calculated on the datasets.")

    if logging_strategy != "step" and logging_strategy != "epoch":
    if mode == "offline":
      logger.warning(
          "Warning: `logging_strategy` needs to be either 'step' or 'epoch'. Defaulting to 'step'."
      )
      logging_strategy = "step"
          'Note: Model checkpoints will not be uploaded to W&B in offline mode.\n'
          'Please set `mode="online"` if you need to log your model.')

    self.datasets = {"train": train_dataset, "eval": eval_dataset}
    self.train_dataset_size = len(self.datasets["train"])
    self.metrics = metrics
    self.logging_strategy = logging_strategy

    self.log_model = log_model
    self.log_dataset = log_dataset
    self.save_dir = save_dir
    self.save_run_history = save_run_history

@@ -133,79 +91,17 @@ class WandbLogger(object):
      self.wandb_run = self._wandb.run
    self.initialized = True

  def check_other_loggers(self, callbacks):
    """Check for different callbacks and warn for redundant logging behaviour.
    Parameters
    ----------
    callbacks: function or list of functions
      one or more functions of the form f(model, step) that will be passed into fit().

    """
    for c in callbacks:
      if isinstance(c, ValidationCallback):
        logger.warning(
            "Note: You are using both WandbLogger and ValidationCallback. "
            "This will result in evaluation metrics being calculated twice and may increase runtime."
        )

  def calculate_epoch_and_sample_count(self, current_step):
    """Calculates the steps per epoch, current epoch number,
    and the number of samples seen by the model.

    Parameters
    ----------
    current_step: int
      the training step of the model

    """
    self.steps_per_epoch = math.ceil(self.train_dataset_size /
                                     self.wandb_run.config.batch_size)
    self.epoch_num = current_step / self.steps_per_epoch
    self.sample_count = current_step * self.wandb_run.config.batch_size

  def log(self, model, extra_data, step):
    """Logs the metrics and other extra data to W&B.
  def log_data(self, data, step):
    """Log data to W&B.

    Parameters
    ----------
    model: tf.keras.Model
     the Keras model implementing the calculation
    extra_data: dict
     extra data to be logged alongside calculated metrics
    data: dict
      the data to be logged to W&B
    step: int
     the step number
      the step number at which the data is to be logged
    """

    all_data = dict({})
    all_data.update(extra_data)
    all_data.update({
        'train/epoch': self.epoch_num,
        'train/sample_count': self.sample_count
    })

    if self.metrics is not None and self.metrics:
      # Get Training Metrics (interval dependent)
      if self.logging_strategy == "step" and step % self.wandb_run.config.log_frequency == 0:
        scores = model.evaluate(self.datasets["train"], self.metrics)
        scores = {'train/' + k: v for k, v in scores.items()}
        all_data.update(scores)
      elif self.logging_strategy == "epoch" and step % self.steps_per_epoch == 0:
        scores = model.evaluate(self.datasets["train"], self.metrics)
        scores = {'train/' + k: v for k, v in scores.items()}
        all_data.update(scores)

      # Get Eval Metrics (interval dependent)
      if self.datasets["eval"] is not None:
        if self.logging_strategy == "step" and step % self.wandb_run.config.log_frequency == 0:
          scores = model.evaluate(self.datasets["eval"], self.metrics)
          scores = {'eval/' + k: v for k, v in scores.items()}
          all_data.update(scores)
        elif self.logging_strategy == "epoch" and step % self.steps_per_epoch == 0:
          scores = model.evaluate(self.datasets["eval"], self.metrics)
          scores = {'eval/' + k: v for k, v in scores.items()}
          all_data.update(scores)

    self.wandb_run.log(all_data, step=step)
    self.wandb_run.log(data, step=step)

  def finish(self):
    """Finishes and closes the W&B run.

wandb/latest-run

0 → 120000
+1 −0
Original line number Diff line number Diff line
run-20210610_120802-3oel4bgb
 No newline at end of file
Loading