Unverified Commit 29d01b5e authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2228 from hsjang001205/DAG_reload

Fix directed acyclic graph network bug
parents 2b792b4f e94b9db0
Loading
Loading
Loading
Loading
+48 −18
Original line number Diff line number Diff line
@@ -2935,22 +2935,37 @@ class DAGLayer(tf.keras.layers.Layer):
    self.W_list = []
    self.b_list = []
    self.dropouts = []
    init = initializers.get(self.init)
    prev_layer_size = self.n_inputs
    for layer_size in self.layer_sizes:
      self.W_list.append(init([prev_layer_size, layer_size]))
      self.b_list.append(backend.zeros(shape=[
          layer_size,
      ]))
      self.W_list.append(
          self.add_weight(
              name='kernel',
              shape=(prev_layer_size, layer_size),
              initializer=self.init,
              trainable=True))
      self.b_list.append(
          self.add_weight(
              name='bias',
              shape=(layer_size,),
              initializer='zeros',
              trainable=True))
      if self.dropout is not None and self.dropout > 0.0:
        self.dropouts.append(Dropout(rate=self.dropout))
      else:
        self.dropouts.append(None)
      prev_layer_size = layer_size
    self.W_list.append(init([prev_layer_size, self.n_outputs]))
    self.b_list.append(backend.zeros(shape=[
        self.n_outputs,
    ]))
    self.W_list.append(
        self.add_weight(
            name='kernel',
            shape=(prev_layer_size, self.n_outputs),
            initializer=self.init,
            trainable=True))
    self.b_list.append(
        self.add_weight(
            name='bias',
            shape=(self.n_outputs,),
            initializer='zeros',
            trainable=True))
    if self.dropout is not None and self.dropout > 0.0:
      self.dropouts.append(Dropout(rate=self.dropout))
    else:
@@ -3068,22 +3083,37 @@ class DAGGather(tf.keras.layers.Layer):
    self.W_list = []
    self.b_list = []
    self.dropouts = []
    init = initializers.get(self.init)
    prev_layer_size = self.n_graph_feat
    for layer_size in self.layer_sizes:
      self.W_list.append(init([prev_layer_size, layer_size]))
      self.b_list.append(backend.zeros(shape=[
          layer_size,
      ]))
      self.W_list.append(
          self.add_weight(
              name='kernel',
              shape=(prev_layer_size, layer_size),
              initializer=self.init,
              trainable=True))
      self.b_list.append(
          self.add_weight(
              name='bias',
              shape=(layer_size,),
              initializer='zeros',
              trainable=True))
      if self.dropout is not None and self.dropout > 0.0:
        self.dropouts.append(Dropout(rate=self.dropout))
      else:
        self.dropouts.append(None)
      prev_layer_size = layer_size
    self.W_list.append(init([prev_layer_size, self.n_outputs]))
    self.b_list.append(backend.zeros(shape=[
        self.n_outputs,
    ]))
    self.W_list.append(
        self.add_weight(
            name='kernel',
            shape=(prev_layer_size, self.n_outputs),
            initializer=self.init,
            trainable=True))
    self.b_list.append(
        self.add_weight(
            name='bias',
            shape=(self.n_outputs,),
            initializer='zeros',
            trainable=True))
    if self.dropout is not None and self.dropout > 0.0:
      self.dropouts.append(Dropout(rate=self.dropout))
    else:
+68 −66
Original line number Diff line number Diff line
@@ -522,72 +522,74 @@ def test_progressivemultitaskregressor_reload():
  assert scores[regression_metric.name] < 0.1


## TODO: THIS IS FAILING!
#def test_DAG_regression_reload():
#  """Test DAG regressor reloads."""
#  np.random.seed(123)
#  tf.random.set_seed(123)
#  n_tasks = 1
#  #current_dir = os.path.dirname(os.path.abspath(__file__))
#
#  # Load mini log-solubility dataset.
#  featurizer = dc.feat.ConvMolFeaturizer()
#  tasks = ["outcome"]
#  mols = ["C", "CO", "CC"]
#  n_samples = len(mols)
#  X = featurizer(mols)
#  y = np.random.rand(n_samples, n_tasks)
#  dataset = dc.data.NumpyDataset(X, y)
#
#  regression_metric = dc.metrics.Metric(
#      dc.metrics.pearson_r2_score, task_averager=np.mean)
#
#  n_feat = 75
#  batch_size = 10
#  transformer = dc.trans.DAGTransformer(max_atoms=50)
#  dataset = transformer.transform(dataset)
#
#  model_dir = tempfile.mkdtemp()
#  model = dc.models.DAGModel(
#      n_tasks,
#      max_atoms=50,
#      n_atom_feat=n_feat,
#      batch_size=batch_size,
#      learning_rate=0.001,
#      use_queue=False,
#      mode="regression",
#      model_dir=model_dir)
#
#  # Fit trained model
#  model.fit(dataset, nb_epoch=1200)
#
#  # Eval model on train
#  scores = model.evaluate(dataset, [regression_metric])
#  assert scores[regression_metric.name] > .8
#
#  reloaded_model = dc.models.DAGModel(
#      n_tasks,
#      max_atoms=50,
#      n_atom_feat=n_feat,
#      batch_size=batch_size,
#      learning_rate=0.001,
#      use_queue=False,
#      mode="regression",
#      model_dir=model_dir)
#  reloaded_model.restore()
#
#  # Check predictions match on random sample
#  predmols = ["CCCC", "CCCCCO", "CCCCC"]
#  Xpred = featurizer(predmols)
#  predset = dc.data.NumpyDataset(Xpred)
#  predset = transformer.transform(predset)
#  origpred = model.predict(predset)
#  reloadpred = reloaded_model.predict(predset)
#  assert np.all(origpred == reloadpred)
#
#  # Eval model on train
#  scores = reloaded_model.evaluate(dataset, [classification_metric])
#  assert scores[classification_metric.name] > .9
def test_DAG_regression_reload():
  """Test DAG regressor reloads."""
  np.random.seed(123)
  tf.random.set_seed(123)
  n_tasks = 1
  #current_dir = os.path.dirname(os.path.abspath(__file__))

  # Load mini log-solubility dataset.
  featurizer = dc.feat.ConvMolFeaturizer()
  tasks = ["outcome"]
  mols = ["CC", "CCO", "CC", "CCC", "CCCCO", "CO", "CC", "CCCCC", "CCC", "CCCO"]
  n_samples = len(mols)
  X = featurizer(mols)
  y = np.random.rand(n_samples, n_tasks)
  dataset = dc.data.NumpyDataset(X, y)

  regression_metric = dc.metrics.Metric(
      dc.metrics.pearson_r2_score, task_averager=np.mean)

  n_feat = 75
  batch_size = 10
  transformer = dc.trans.DAGTransformer(max_atoms=50)
  dataset = transformer.transform(dataset)

  model_dir = tempfile.mkdtemp()
  model = dc.models.DAGModel(
      n_tasks,
      max_atoms=50,
      n_atom_feat=n_feat,
      batch_size=batch_size,
      learning_rate=0.001,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)

  # Fit trained model
  model.fit(dataset, nb_epoch=100)

  # Eval model on train
  scores = model.evaluate(dataset, [regression_metric])
  assert scores[regression_metric.name] > .1

  reloaded_model = dc.models.DAGModel(
      n_tasks,
      max_atoms=50,
      n_atom_feat=n_feat,
      batch_size=batch_size,
      learning_rate=0.001,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)

  reloaded_model.restore()

  # Check predictions match on random sample
  predmols = ["CCCC", "CCCCCO", "CCCCC"]
  Xpred = featurizer(predmols)
  predset = dc.data.NumpyDataset(Xpred)
  predset = transformer.transform(predset)
  origpred = model.predict(predset)
  reloadpred = reloaded_model.predict(predset)

  assert np.all(origpred == reloadpred)

  # Eval model on train
  scores = reloaded_model.evaluate(dataset, [regression_metric])
  assert scores[regression_metric.name] > .1


## TODO: THIS IS FAILING!
#def test_weave_classification_reload_alt():