Unverified Commit 7435b3b1 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2520 from kshen3778/wandb

Weights & Biases Basic Integration
parents eab6ee63 8d0c6bc9
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -67,6 +67,9 @@ target/
# Vim swap
*.swp

# Weights & Biases
wandb/

# Dataset files
datasets/2008-2011_USPTO_reactionSmiles_filtered.zip
datasets/2008-2011_USPTO_reactionSmiles_filtered/
+1 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from deepchem.models.models import Model
from deepchem.models.keras_model import KerasModel
from deepchem.models.multitask import SingletaskToMultitask
from deepchem.models.callbacks import ValidationCallback
from deepchem.models.wandblogger import WandbLogger

from deepchem.models.IRV import MultitaskIRVClassifier
from deepchem.models.robust_multitask import RobustMultitaskClassifier
+4 −3
Original line number Diff line number Diff line
@@ -80,9 +80,6 @@ class ValidationCallback(object):
      for key in scores:
        model._log_scalar_to_tensorboard(key, scores[key],
                                         model.get_global_step())
    if model.wandb:
      import wandb
      wandb.log(scores, step=step)
    if self.save_dir is not None:
      score = scores[self.metrics[self.save_metric].name]
      if not self.save_on_minimum:
@@ -90,3 +87,7 @@ class ValidationCallback(object):
      if self._best_score is None or score < self._best_score:
        model.save_checkpoint(model_dir=self.save_dir)
        self._best_score = score
    if model.wandb or (model.wandb_logger is not None):
      # Log data to Wandb
      data = {'eval/' + k: v for k, v in scores.items()}
      model.wandb_logger.log_data(data, step)
+43 −6
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ import tensorflow as tf
import time
import logging
import os

try:
  from collections.abc import Sequence as SequenceCollection
except:
@@ -18,6 +19,7 @@ from deepchem.utils.evaluate import GeneratorEvaluator

from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from deepchem.utils.typing import ArrayLike, LossFn, OneOrMany
from deepchem.models.wandblogger import WandbLogger

try:
  import wandb
@@ -133,6 +135,7 @@ class KerasModel(Model):
               tensorboard: bool = False,
               wandb: bool = False,
               log_frequency: int = 100,
               wandb_logger: Optional[WandbLogger] = None,
               **kwargs) -> None:
    """Create a new KerasModel.

@@ -159,7 +162,7 @@ class KerasModel(Model):
    tensorboard: bool
      whether to log progress to TensorBoard during training
    wandb: bool
      whether to log progress to Weights & Biases during training
      whether to log progress to Weights & Biases during training (deprecated)
    log_frequency: int
      The frequency at which to log data. Data is logged using
      `logging` by default. If `tensorboard` is set, data is also
@@ -168,6 +171,8 @@ class KerasModel(Model):
      a global step corresponds to one batch of training. If you'd
      like a printout every 10 batch steps, you'd set
      `log_frequency=10` for example.
    wandb_logger: WandbLogger
      the Weights & Biases logger object used to log data and metrics
    """
    super(KerasModel, self).__init__(model=model, model_dir=model_dir, **kwargs)
    if isinstance(loss, Loss):
@@ -181,14 +186,41 @@ class KerasModel(Model):
      self.optimizer = optimizer
    self.tensorboard = tensorboard

    # W&B logging
    # W&B flag support (DEPRECATED)
    if wandb:
      logger.warning(
          "`wandb` argument is deprecated. Please use `wandb_logger` instead. "
          "This argument will be removed in a future release of DeepChem.")
    if wandb and not _has_wandb:
      logger.warning(
          "You set wandb to True but W&B is not installed. To use wandb logging, "
          "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface."
      )
          "run `pip install wandb; wandb login`")
    self.wandb = wandb and _has_wandb

    self.wandb_logger = wandb_logger
    # If `wandb=True` and no logger is provided, initialize default logger
    if self.wandb and (self.wandb_logger is None):
      self.wandb_logger = WandbLogger()

    # Setup and initialize W&B logging
    if (self.wandb_logger is not None) and (not self.wandb_logger.initialized):
      self.wandb_logger.setup()

    # Update config with KerasModel params
    wandb_logger_config = dict(
        loss=loss,
        output_types=output_types,
        batch_size=batch_size,
        model_dir=model_dir,
        learning_rate=learning_rate,
        optimizer=optimizer,
        tensorboard=tensorboard,
        log_frequency=log_frequency)
    wandb_logger_config.update(**kwargs)

    if self.wandb_logger is not None:
      self.wandb_logger.update_config(wandb_logger_config)

    # Backwards compatibility
    if "tensorboard_log_frequency" in kwargs:
      logger.warning(
@@ -432,8 +464,13 @@ class KerasModel(Model):
        c(self, current_step)
      if self.tensorboard and should_log:
        self._log_scalar_to_tensorboard('loss', batch_loss, current_step)
      if self.wandb and should_log:
        wandb.log({'loss': batch_loss}, step=current_step)
      if (self.wandb_logger is not None) and should_log:
        all_data = dict({'train/loss': batch_loss})
        self.wandb_logger.log_data(all_data, step=current_step)

    # Close WandbLogger
    if self.wandb_logger is not None:
      self.wandb_logger.finish()

    # Report final results.
    if averaged_batches > 0:
+35 −1
Original line number Diff line number Diff line
import os
import unittest
import math
import deepchem as dc
import numpy as np
import tensorflow as tf
import unittest

try:
  import wandb
  has_wandb = True
except:
  has_wandb = False


def test_overfit_graph_model():
@@ -297,6 +304,33 @@ def test_tensorboard():
  assert file_size > 0


@unittest.skipIf(not has_wandb, 'Wandb is not installed')
def test_wandblogger():
  """Test logging to Weights & Biases."""
  # Load dataset and Models
  tasks, datasets, transformers = dc.molnet.load_delaney(
      featurizer='ECFP', splitter='random')
  train_dataset, valid_dataset, test_dataset = datasets
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
  wandblogger = dc.models.WandbLogger(anonymous="allow", save_run_history=True)

  keras_model = tf.keras.Sequential(
      [tf.keras.layers.Dense(10, activation='relu'),
       tf.keras.layers.Dense(1)])
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.L2Loss(), wandb_logger=wandblogger)
  vc = dc.models.ValidationCallback(valid_dataset, 1, [metric])
  model.fit(train_dataset, nb_epoch=10, callbacks=[vc])

  run_data = wandblogger.run_history
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  assert math.isclose(
      valid_score["pearson_r2_score"],
      run_data['eval/pearson_r2_score'],
      abs_tol=0.0005)


def test_fit_variables():
  """Test training a subset of the variables in a model."""

Loading