Commit 23e59acb authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Core changes

parent 1cbf4dfb
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -438,6 +438,11 @@ class DiskDataset(Dataset):
    save_to_disk(
        (self.tasks, self.metadata_df), self._get_metadata_filename())

  def move(self, new_data_dir):
    """Moves dataset to new directory."""
    shutil.move(self.data_dir, new_data_dir)
    self.data_dir = new_data_dir

  def get_task_names(self):
    """
    Gets learning tasks associated with this dataset.
+23 −13
Original line number Diff line number Diff line
@@ -145,16 +145,28 @@ class Metric(object):
    # user-space as a custom task_averager function.
    self.compute_energy_metric = compute_energy_metric

  def compute_metric(self, y_true, y_pred, w=None, n_classes=2, filter_nans=True):
  def compute_metric(self, y_true, y_pred, w=None, n_classes=2, filter_nans=True,
                     per_task_metrics=False):
    """Compute a performance metric for each task.

    Args:
      y_true: A list of arrays containing true values for each task.
      y_pred: A list of arrays containing predicted values for each task.
      metric: Must be a class that inherits from Metric 
    Parameters
    ----------
    y_true: np.ndarray
      An np.ndarray containing true values for each task.
    y_pred: np.ndarray
      An np.ndarray containing predicted values for each task.
    w: np.ndarray, optional
      An np.ndarray containing weights for each datapoint.
    n_classes: int, optional
      Number of classes in data for classification tasks.
    filter_nans: bool, optional
      Remove NaN values in computed metrics
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.

    Returns:
      A numpy array containing metric values for each task.
    Returns
    -------
    A numpy nd.array containing metric values for each task.
    """
    if len(y_true.shape) > 1:
      n_samples, n_tasks = y_true.shape[0], y_true.shape[1] 
@@ -189,13 +201,11 @@ class Metric(object):
      if filter_nans:
        computed_metrics = np.array(computed_metrics)
        computed_metrics = computed_metrics[~np.isnan(computed_metrics)]
      if self.compute_energy_metric:
        # TODO(rbharath, joegomes): What is this magic number?
        force_error = self.task_averager(computed_metrics[1:])*4961.47596096
        print("Force error (metric: np.mean(%s)): %f kJ/mol/A" % (self.name, force_error))
        return computed_metrics[0]
      else:
      if not per_task_metrics:
        return self.task_averager(computed_metrics)
      else:
        return self.task_averager(computed_metrics), computed_metrics


  def compute_singletask_metric(self, y_true, y_pred, w):
    """Compute a metric value.
+11 −3
Original line number Diff line number Diff line
@@ -155,7 +155,8 @@ class Model(object):
      y_pred = np.reshape(y_pred, (n_samples,)) 
    return y_pred

  def evaluate(self, dataset, metrics, transformers=[]):
  def evaluate(self, dataset, metrics, transformers=[],
               per_task_metrics=False):
    """
    Evaluates the performance of this model on specified dataset.
  
@@ -167,6 +168,8 @@ class Model(object):
      Evaluation metric
    transformers: list
      List of deepchem.transformers.Transformer
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
@@ -174,8 +177,13 @@ class Model(object):
      Maps tasks to scores under metric.
    """
    evaluator = Evaluator(self, dataset, transformers)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def predict_proba(self, dataset, transformers=[], batch_size=None,
                    n_classes=2):
+22 −21
Original line number Diff line number Diff line
@@ -511,27 +511,28 @@ class TensorflowGraphModel(Model):
    y_pred = np.reshape(y_pred, (n_samples, n_tasks, n_classes))
    return y_pred

  def evaluate(self, dataset, metrics, transformers=[]):
    """
    Evaluates the performance of this model on specified dataset.
  
    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset object.
    metric: deepchem.metrics.Metric
      Evaluation metric
    transformers: list
      List of deepchem.transformers.Transformer

    Returns
    -------
    dict
      Maps tasks to scores under metric.
    """
    evaluator = Evaluator(self, dataset, transformers)
    scores = evaluator.compute_model_performance(metrics)
    return scores
  # TODO(rbharath): Verify this can be safely removed.
  #def evaluate(self, dataset, metrics, transformers=[]):
  #  """
  #  Evaluates the performance of this model on specified dataset.
  #
  #  Parameters
  #  ----------
  #  dataset: dc.data.Dataset
  #    Dataset object.
  #  metric: deepchem.metrics.Metric
  #    Evaluation metric
  #  transformers: list
  #    List of deepchem.transformers.Transformer

  #  Returns
  #  -------
  #  dict
  #    Maps tasks to scores under metric.
  #  """
  #  evaluator = Evaluator(self, dataset, transformers)
  #  scores = evaluator.compute_model_performance(metrics)
  #  return scores

  def _find_last_checkpoint(self):
    """Finds last saved checkpoint."""
+1 −1
Original line number Diff line number Diff line
@@ -749,7 +749,7 @@ def weight_decay(penalty_type, penalty):
    else:
      raise NotImplementedError('Unsupported penalty_type %s' % penalty_type)
    cost *= penalty
    tf.scalar_summary('Weight Decay Cost', cost)
    #tf.scalar_summary('Weight Decay Cost', cost)
  return cost


Loading