Commit 34ae6aa0 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #374 from rbharath/factors_uv_kinase

Fixes and updates to factors/uv/kinase examples
parents 0ccfdf41 dbc6b94e
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -451,6 +451,11 @@ class DiskDataset(Dataset):
    save_to_disk(
        (self.tasks, self.metadata_df), self._get_metadata_filename())

  def move(self, new_data_dir):
    """Moves dataset to new directory."""
    shutil.move(self.data_dir, new_data_dir)
    self.data_dir = new_data_dir

  def get_task_names(self):
    """
    Gets learning tasks associated with this dataset.
+1 −1
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ import deepchem as dc
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

class TestHyperparamOptAPI(unittest.TestCase):
class TestHyperparamOpt(unittest.TestCase):
  """
  Test hyperparameter optimization API.
  """
+26 −11
Original line number Diff line number Diff line
@@ -145,16 +145,28 @@ class Metric(object):
    # user-space as a custom task_averager function.
    self.compute_energy_metric = compute_energy_metric

  def compute_metric(self, y_true, y_pred, w=None, n_classes=2, filter_nans=True):
  def compute_metric(self, y_true, y_pred, w=None, n_classes=2, filter_nans=True,
                     per_task_metrics=False):
    """Compute a performance metric for each task.

    Args:
      y_true: A list of arrays containing true values for each task.
      y_pred: A list of arrays containing predicted values for each task.
      metric: Must be a class that inherits from Metric 
    Parameters
    ----------
    y_true: np.ndarray
      An np.ndarray containing true values for each task.
    y_pred: np.ndarray
      An np.ndarray containing predicted values for each task.
    w: np.ndarray, optional
      An np.ndarray containing weights for each datapoint.
    n_classes: int, optional
      Number of classes in data for classification tasks.
    filter_nans: bool, optional
      Remove NaN values in computed metrics
    per_task_metrics: bool, optional
      If true, return computed metric for each task on multitask dataset.

    Returns:
      A numpy array containing metric values for each task.
    Returns
    -------
    A numpy nd.array containing metric values for each task.
    """
    if len(y_true.shape) > 1:
      n_samples, n_tasks = y_true.shape[0], y_true.shape[1] 
@@ -194,8 +206,11 @@ class Metric(object):
        force_error = self.task_averager(computed_metrics[1:])*4961.47596096    
        print("Force error (metric: np.mean(%s)): %f kJ/mol/A" % (self.name, force_error))    
        return computed_metrics[0]
      else:
      elif not per_task_metrics:
        return self.task_averager(computed_metrics)
      else:
        return self.task_averager(computed_metrics), computed_metrics


  def compute_singletask_metric(self, y_true, y_pred, w):
    """Compute a metric value.
+11 −3
Original line number Diff line number Diff line
@@ -155,7 +155,8 @@ class Model(object):
      y_pred = np.reshape(y_pred, (n_samples,)) 
    return y_pred

  def evaluate(self, dataset, metrics, transformers=[]):
  def evaluate(self, dataset, metrics, transformers=[],
               per_task_metrics=False):
    """
    Evaluates the performance of this model on specified dataset.
  
@@ -167,6 +168,8 @@ class Model(object):
      Evaluation metric
    transformers: list
      List of deepchem.transformers.Transformer
    per_task_metrics: bool
      If True, return per-task scores.

    Returns
    -------
@@ -174,8 +177,13 @@ class Model(object):
      Maps tasks to scores under metric.
    """
    evaluator = Evaluator(self, dataset, transformers)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
    else:
      scores, per_task_scores = evaluator.compute_model_performance(
          metrics, per_task_metrics=per_task_metrics)
      return scores, per_task_scores

  def predict_proba(self, dataset, transformers=[], batch_size=None,
                    n_classes=2):
+1 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ import os
import sklearn
import tempfile
import numpy as np
import shutil
from deepchem.utils.save import log
from deepchem.models import Model
from deepchem.data import DiskDataset
Loading