Commit fc242f71 authored by Kevin Shen's avatar Kevin Shen
Browse files

Added tests for wandblogger

parent 4e6ce062
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from deepchem.models.models import Model
from deepchem.models.keras_model import KerasModel
from deepchem.models.multitask import SingletaskToMultitask
from deepchem.models.callbacks import ValidationCallback
from deepchem.models.wandblogger import WandbLogger

from deepchem.models.fcnet import MultitaskRegressor
from deepchem.models.fcnet import MultitaskClassifier
+63 −34
Original line number Diff line number Diff line
import os
import unittest
import math
import deepchem as dc
import numpy as np
import tensorflow as tf
@@ -18,8 +18,7 @@ def test_overfit_graph_model():
  logits = tf.keras.layers.Dense(1)(hidden)
  outputs = tf.keras.layers.Activation('sigmoid')(logits)
  keras_model = tf.keras.Model(inputs=inputs, outputs=[outputs, logits])
  model = dc.models.KerasModel(
      keras_model,
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.SigmoidCrossEntropy(),
                               output_types=['prediction', 'loss'],
                               learning_rate=0.005)
@@ -47,8 +46,9 @@ def test_overfit_sequential_model():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  model.fit(dataset, nb_epoch=1000)
  prediction = np.squeeze(model.predict_on_batch(X))
  assert np.array_equal(y, np.round(prediction))
@@ -69,8 +69,7 @@ def test_fit_use_all_losses():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(
      keras_model,
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005,
                               log_frequency=10)
@@ -92,8 +91,9 @@ def test_fit_on_batch():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  i = 0
  for X, y, w, ids in dataset.iterbatches(model.batch_size, 500):
    i += 1
@@ -113,8 +113,9 @@ def test_checkpointing():
  keras_model1 = tf.keras.Sequential([tf.keras.layers.Dense(10)])
  keras_model2 = tf.keras.Sequential([tf.keras.layers.Dense(10)])
  model1 = dc.models.KerasModel(keras_model1, dc.models.losses.L2Loss())
  model2 = dc.models.KerasModel(
      keras_model2, dc.models.losses.L2Loss(), model_dir=model1.model_dir)
  model2 = dc.models.KerasModel(keras_model2,
                                dc.models.losses.L2Loss(),
                                model_dir=model1.model_dir)

  # Check that they produce different results.

@@ -148,8 +149,9 @@ def test_fit_restore():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model = dc.models.KerasModel(
      keras_model, dc.models.losses.BinaryCrossEntropy(), learning_rate=0.005)
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.BinaryCrossEntropy(),
                               learning_rate=0.005)
  model.fit(dataset, nb_epoch=1000)
  prediction = np.squeeze(model.predict_on_batch(X))
  assert np.array_equal(y, np.round(prediction))
@@ -161,8 +163,7 @@ def test_fit_restore():
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
  ])
  model2 = dc.models.KerasModel(
      keras_model2,
  model2 = dc.models.KerasModel(keras_model2,
                                dc.models.losses.BinaryCrossEntropy(),
                                model_dir=model.model_dir)
  model2.fit(dataset, nb_epoch=1, restore=True)
@@ -188,8 +189,8 @@ def test_uncertainty():
  output = tf.keras.layers.Dense(n_features)(dropout)
  log_var = tf.keras.layers.Dense(n_features)(dropout)
  var = tf.keras.layers.Activation(tf.exp)(log_var)
  keras_model = tf.keras.Model(
      inputs=[inputs, switch], outputs=[output, var, output, log_var])
  keras_model = tf.keras.Model(inputs=[inputs, switch],
                               outputs=[output, var, output, log_var])

  def loss(outputs, labels, weights):
    diff = labels[0] - outputs[0]
@@ -206,8 +207,8 @@ def test_uncertainty():
                          deterministic=True,
                          pad_batches=True):
      for epoch in range(epochs):
        for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
            batch_size=self.batch_size,
        for (X_b, y_b, w_b,
             ids_b) in dataset.iterbatches(batch_size=self.batch_size,
                                           deterministic=deterministic,
                                           pad_batches=pad_batches):
          if mode == 'predict':
@@ -283,8 +284,7 @@ def test_tensorboard():
  keras_model = tf.keras.Sequential([
      tf.keras.layers.Dense(2, activation='softmax'),
  ])
  model = dc.models.KerasModel(
      keras_model,
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.CategoricalCrossEntropy(),
                               tensorboard=True,
                               log_frequency=1)
@@ -297,6 +297,35 @@ def test_tensorboard():
  assert file_size > 0


def test_wandblogger():
  """Test logging to Weights & Biases."""
  # Load dataset and Models
  tasks, datasets, transformers = dc.molnet.load_delaney(featurizer='ECFP',
                                                         splitter='random')
  train_dataset, valid_dataset, test_dataset = datasets
  keras_model = tf.keras.Sequential(
      [tf.keras.layers.Dense(10, activation='relu'),
       tf.keras.layers.Dense(1)])
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
  wandblogger = dc.models.WandbLogger(train_dataset=train_dataset,
                                      eval_dataset=valid_dataset,
                                      metrics=[metric],
                                      logging_strategy="step",
                                      anonymous="allow",
                                      save_run_history=True)
  model = dc.models.KerasModel(keras_model,
                               dc.models.losses.L2Loss(),
                               wandb_logger=wandblogger)
  model.fit(train_dataset, nb_epoch=10)

  run_data = wandblogger.run_history._data
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  assert math.isclose(valid_score["pearson_r2_score"],
                      run_data['eval/pearson_r2_score'],
                      abs_tol=0.0005)


def test_fit_variables():
  """Test training a subset of the variables in a model."""

+9 −0
Original line number Diff line number Diff line
import math
import copy
import logging
import importlib.util
from typing import List, Optional, Union
@@ -8,6 +9,7 @@ from deepchem.models.callbacks import ValidationCallback

logger = logging.getLogger(__name__)


def is_wandb_available():
  return importlib.util.find_spec("wandb") is not None

@@ -40,6 +42,7 @@ class WandbLogger(object):
               anonymous: Optional[str] = "never",
               log_model: Optional[bool] = False,
               log_dataset: Optional[bool] = False,
               save_run_history: Optional[bool] = False,
               **kwargs):
    """Parameters
    ----------
@@ -71,6 +74,8 @@ class WandbLogger(object):
      whether to log the model to W&B
    log_dataset: bool
      whether to log the dataset to W&B
    save_run_history: bool
      whether to save the run history to the logger at the end (for testing purposes)
    """

    assert is_wandb_available(
@@ -104,6 +109,7 @@ class WandbLogger(object):
    self.log_model = log_model
    self.log_dataset = log_dataset
    self.save_dir = save_dir
    self.save_run_history = save_run_history

    # set wandb init arguments
    self.wandb_init_params = dict(name=name,
@@ -199,7 +205,10 @@ class WandbLogger(object):

  def finish(self):
    """Finishes and closes the W&B run.
    Save run history data as field if configured to do that.
    """
    if self.save_run_history:
      self.run_history = copy.deepcopy(self.wandb_run.history)
    self.wandb_run.finish()

  def update_config(self, config_data):