Commit 0917ec2e authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

First run-through to completion.

parent c9a61899
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ from deep_chem.utils.preprocess import train_test_random_split
from deep_chem.utils.preprocess import train_test_scaffold_split
from deep_chem.utils.preprocess import scaffold_separate
from deep_chem.utils.preprocess import multitask_to_singletask
from deep_chem.utils.preprocess import get_default_task_types_and_transforms
from deep_chem.utils.load import get_default_task_types_and_transforms
from deep_chem.utils.preprocess import get_default_descriptor_transforms

def parse_args(input_args=None):
+2 −0
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@ __author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2015, Stanford University"
__license__ = "LGPL"

import numpy as np

def summarize_distribution(y):
  """Analyzes regression dataset.

+5 −0
Original line number Diff line number Diff line
@@ -5,6 +5,11 @@ __author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2015, Stanford University"
__license__ = "LGPL"

import numpy as np
from deep_chem.utils.preprocess import dataset_to_numpy
from deep_chem.utils.preprocess import labels_to_weights
from sklearn.metrics import roc_auc_score

def model_predictions(test_set, model, n_targets, n_descriptors=0,
    add_descriptors=False, modeltype="sklearn"):
  """Obtains predictions of provided model on test_set.
+23 −0
Original line number Diff line number Diff line
@@ -11,6 +11,29 @@ import os
import cPickle as pickle
from deep_chem.utils.preprocess import transform_outputs

def get_default_task_types_and_transforms(dataset_specs):
  """Provides default task transforms for provided datasets.
  
  Parameters
  ----------
  dataset_specs: dict
    Maps name of datasets to filepath.
  """
  task_types, task_transforms = {}, {}
  for name, path in dataset_specs.iteritems():
    targets = get_target_names([path])
    if name == "muv" or name == "dude" or name == "pcba":
      for target in targets:
        task_types[target] = "classification"
        task_transforms[target] = []
    elif name == "pfizer":
      for target in targets:
        task_types[target] = "regression"
        task_transforms[target] = ["log", "normalize"]
    elif name == "pdbbind":
      raise ValueError("pdbbind not yet supported!")
  return task_types, task_transforms

def load_descriptors(paths, descriptor_dir_name="descriptors"):
  """Load dataset descriptors and return.

+1 −24
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ __copyright__ = "Copyright 2015, Stanford University"
__license__ = "LGPL"

import numpy as np
from deep_chem.utils.load import get_target_names
from deep_chem.utils.analysis import summarize_distribution

def get_default_descriptor_transforms():
  """Provides default descriptor transforms for rdkit descriptors."""
@@ -17,29 +17,6 @@ def get_default_descriptor_transforms():
    desc_transforms[desc] = ["normalize"]
  return desc_transforms

def get_default_task_types_and_transforms(dataset_specs):
  """Provides default task transforms for provided datasets.
  
  Parameters
  ----------
  dataset_specs: dict
    Maps name of datasets to filepath.
  """
  task_types, task_transforms = {}, {}
  for name, path in dataset_specs.iteritems():
    targets = get_target_names([path])
    if name == "muv" or name == "dude" or name == "pcba":
      for target in targets:
        task_types[target] = "classification"
        task_transforms[target] = []
    elif name == "pfizer":
      for target in targets:
        task_types[target] = "regression"
        task_transforms[target] = ["log", "normalize"]
    elif name == "pdbbind":
      raise ValueError("pdbbind not yet supported!")
  return task_types, task_transforms

def transform_outputs(dataset, task_transforms, desc_transforms={},
    add_descriptors=False):
  """Tranform the provided outputs