Unverified Commit 4123f02b authored by hsjang001205's avatar hsjang001205 Committed by GitHub
Browse files

Update test_reload.py

parent 23747136
Loading
Loading
Loading
Loading
+69 −85
Original line number Diff line number Diff line
@@ -522,6 +522,75 @@ def test_progressivemultitaskregressor_reload():
  assert scores[regression_metric.name] < 0.1

  
def test_DAG_regression_reload():
  """Test DAG regressor reloads."""
  np.random.seed(123)
  tf.random.set_seed(123)
  n_tasks = 1
  #current_dir = os.path.dirname(os.path.abspath(__file__))

  # Load mini log-solubility dataset.
  featurizer = dc.feat.ConvMolFeaturizer()
  tasks = ["outcome"]
  mols = ["CC", "CCO", "CC", "CCC", "CCCCO", "CO", "CC", "CCCCC", "CCC", "CCCO"]
  n_samples = len(mols)
  X = featurizer(mols)
  y = np.random.rand(n_samples, n_tasks)
  dataset = dc.data.NumpyDataset(X, y)

  regression_metric = dc.metrics.Metric(
      dc.metrics.pearson_r2_score, task_averager=np.mean)

  n_feat = 75
  batch_size = 10
  transformer = dc.trans.DAGTransformer(max_atoms=50)
  dataset = transformer.transform(dataset)

  model_dir = tempfile.mkdtemp()
  model = dc.models.DAGModel(
      n_tasks,
      max_atoms=50,
      n_atom_feat=n_feat,
      batch_size=batch_size,
      learning_rate=0.001,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)

  # Fit trained model
  model.fit(dataset, nb_epoch=100)

  # Eval model on train
  scores = model.evaluate(dataset, [regression_metric])
  assert scores[regression_metric.name] > .1

  reloaded_model = dc.models.DAGModel(
      n_tasks,
      max_atoms=50,
      n_atom_feat=n_feat,
      batch_size=batch_size,
      learning_rate=0.001,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)

  reloaded_model.restore()

  # Check predictions match on random sample
  predmols = ["CCCC", "CCCCCO", "CCCCC"]
  Xpred = featurizer(predmols)
  predset = dc.data.NumpyDataset(Xpred)
  predset = transformer.transform(predset)
  origpred = model.predict(predset)
  reloadpred = reloaded_model.predict(predset)

  assert np.all(origpred == reloadpred)

  # Eval model on train
  scores = reloaded_model.evaluate(dataset, [regression_metric])
  assert scores[regression_metric.name] > .1  
  
  
def test_weave_classification_reload():
  """Test weave model can be reloaded."""
  np.random.seed(123)
@@ -586,91 +655,6 @@ def test_weave_classification_reload():
  assert scores[classification_metric.name] > .6


## TODO: THIS IS FAILING!
#@pytest.mark.slow
#def test_weave_classification_reload():
#  """Test weave model can be reloaded."""
#  np.random.seed(123)
#  tf.random.set_seed(123)
#  n_tasks = 1
#
#  # Load mini log-solubility dataset.
#  featurizer = dc.feat.WeaveFeaturizer()
#  tasks = ["outcome"]
#  mols = ["C", "CO", "CC"]
#  n_samples = len(mols)
#  X = featurizer(mols)
#  y = np.random.randint(2, size=(n_samples, n_tasks))
#  dataset = dc.data.NumpyDataset(X, y)
#
#  classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
#
#  batch_size = 3
#
#  model_dir = tempfile.mkdtemp()
#  model = dc.models.WeaveModel(
#      n_tasks,
#      batch_size=batch_size,
#      learning_rate=0.0003,
#      mode="classification",
#      dropouts=0.0,
#      model_dir=model_dir)
#
#  # Fit trained model
#  model.fit(dataset, nb_epoch=3)
#
#  # Eval model on train
#  scores = model.evaluate(dataset, [classification_metric])
#  assert scores[classification_metric.name] > .9
#
#  # Check predictions match on random sample
#  predmols = ["CCCC", "CCCCCO", "CCCCC"]
#  Xpred = featurizer(predmols)
#
#  predset = dc.data.NumpyDataset(Xpred)
#  origpred = model.predict(predset)
#  origpred2 = model.predict(predset)
#  assert np.all(origpred == origpred2)
#
#  reloaded_model = dc.models.WeaveModel(
#      n_tasks,
#      batch_size=batch_size,
#      learning_rate=0.0003,
#      mode="classification",
#      dropouts=0.0,
#      model_dir=model_dir)
#  reloaded_model.restore()
#
#  Xproc = reloaded_model.compute_features_on_batch(Xpred)
#  reloadout = reloaded_model.model(Xproc)
#  print("reloadout")
#  print(reloadout)
#
#  reloadpred = reloaded_model.predict(predset)
#  print("reloadpred")
#  print(reloadpred)
#
#  print("origpred")
#  print(origpred)

#  ## Try re-restore
#  #reloaded_model.restore()
#  #reloadpred = reloaded_model.predict(predset)
#
#  #assert np.all(origpred == reloadpred)
#  print("np.amax(origpred - reloadpred)")
#  print(np.amax(origpred - reloadpred))
#  print("np.allclose(origpred, reloadpred)")
#  print(np.allclose(origpred, reloadpred))
#
#  # Eval model on train
#  scores = reloaded_model.evaluate(dataset, [classification_metric])
#  print("scores")
#  print(scores)
#  assert scores[classification_metric.name] > .9
#
#  assert np.all(origpred == reloadpred)

# TODO: THIS IS FAILING!
#def test_MPNN_regression_reload():
#  """Test MPNN can reload datasets."""