Commit 8b3e3088 authored by nitinprakash96's avatar nitinprakash96
Browse files

Merge branch 'master' of https://github.com/deepchem/deepchem

parents 241dc427 6f7b8ec7
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ git clone https://github.com/deepchem/deepchem.git # Clone deepchem source
cd deepchem
bash scripts/install_deepchem_conda.sh deepchem
source activate deepchem
yes | pip install tensorflow-gpu==1.5.0      # If you want GPU support
yes | pip install tensorflow-gpu==1.6.0      # If you want GPU support
python setup.py install                                # Manual install
nosetests -a '!slow' -v deepchem --nologcapture        # Run tests
```
+6 −6
Original line number Diff line number Diff line
@@ -200,8 +200,8 @@ class Dataset(object):
    >>> dataset = NumpyDataset(np.ones((2,2)))
    >>> for x, y, w, id in dataset.itersamples():
    ...   print(x.tolist(), y.tolist(), w.tolist(), id)
    [1.0 1.0] [0.0] [0.0] 0
    [1.0 1.0] [0.0] [0.0] 1
    [1.0, 1.0] [0.0] [0.0] 0
    [1.0, 1.0] [0.0] [0.0] 1
    """
    raise NotImplementedError()

@@ -409,8 +409,8 @@ class NumpyDataset(Dataset):
    >>> dataset = NumpyDataset(np.ones((2,2)))
    >>> for x, y, w, id in dataset.itersamples():
    ...   print(x.tolist(), y.tolist(), w.tolist(), id)
    [1.0 1.0] [0.0] [0.0] 0
    [1.0 1.0] [0.0] [0.0] 1
    [1.0, 1.0] [0.0] [0.0] 0
    [1.0, 1.0] [0.0] [0.0] 1
    """
    n_samples = self._X.shape[0]
    return ((self._X[i], self._y[i], self._w[i], self._ids[i])
@@ -889,8 +889,8 @@ class DiskDataset(Dataset):
    >>> dataset = DiskDataset.from_numpy(np.ones((2,2)), np.ones((2,1)), verbose=False)
    >>> for x, y, w, id in dataset.itersamples():
    ...   print(x.tolist(), y.tolist(), w.tolist(), id)
    [1.0 1.0] [0.0] [0.0] 0
    [1.0 1.0] [0.0] [0.0] 1
    [1.0, 1.0] [1.0] [1.0] 0
    [1.0, 1.0] [1.0] [1.0] 1
    """

    def iterate(dataset):
+50 −7
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@ from __future__ import unicode_literals

import numpy as np
from rdkit import Chem
import itertools, operator

from deepchem.feat import Featurizer
from deepchem.feat.mol_graphs import ConvMol, WeaveMol
@@ -199,8 +198,8 @@ def bond_features(bond, use_chirality=False):
  ]
  if use_chirality:
    bond_feats = bond_feats + one_of_k_encoding_unk(
        str(bond.GetStereo(),
            ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]))
        str(bond.GetStereo()),
        ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"])
  return np.array(bond_feats)


@@ -264,7 +263,8 @@ def find_distance(a1, num_atoms, canon_adj_list, max_distance=7):
class ConvMolFeaturizer(Featurizer):
  name = ['conv_mol']

  def __init__(self, master_atom=False, use_chirality=False):
  def __init__(self, master_atom=False, use_chirality=False,
               atom_properties=[]):
    """
    Parameters
    ----------
@@ -274,7 +274,20 @@ class ConvMolFeaturizer(Featurizer):
      the molecule.  This technique is briefly discussed in
      Neural Message Passing for Quantum Chemistry
      https://arxiv.org/pdf/1704.01212.pdf

    use_chirality: Boolean
      if true then make the resulting atom features aware of the 
      chirality of the molecules in question
    atom_properties: list of string or None
      properties in the RDKit Mol object to use as additional
      atom-level features in the larger molecular feature.  If None,
      then no atom-level properties are used.  Properties should be in the 
      RDKit mol object should be in the form 
      atom XXXXXXXX NAME
      where XXXXXXXX is a zero-padded 8 digit number coresponding to the
      zero-indexed atom index of each atom and NAME is the name of the property
      provided in atom_properties.  So "atom 00000000 sasa" would be the
      name of the molecule level property in mol where the solvent 
      accessible surface area of atom 0 would be stored. 

    Since ConvMol is an object and not a numpy array, need to set dtype to
    object.
@@ -282,12 +295,39 @@ class ConvMolFeaturizer(Featurizer):
    self.dtype = object
    self.master_atom = master_atom
    self.use_chirality = use_chirality
    self.atom_properties = list(atom_properties)

  def _get_atom_properties(self, atom):
    """
    For a given input RDKit atom return the values of the properties
    requested when initializing the featurize.  See the __init__ of the
    class for a full description of the names of the properties
    
    Parameters
    ----------
    atom: RDKit.rdchem.Atom
      Atom to get the properties of
    returns a numpy lists of floats of the same size as self.atom_properties
    """
    values = []
    for prop in self.atom_properties:
      mol_prop_name = str("atom %08d %s" % (atom.GetIdx(), prop))
      try:
        values.append(float(atom.GetOwningMol().GetProp(mol_prop_name)))
      except KeyError:
        raise KeyError("No property %s found in %s in %s" %
                       (mol_prop_name, atom.GetOwningMol(), self))
    return np.array(values)

  def _featurize(self, mol):
    """Encodes mol as a ConvMol object."""
    # Get the node features
    idx_nodes = [(a.GetIdx(), atom_features(
        a, use_chirality=self.use_chirality)) for a in mol.GetAtoms()]
    idx_nodes = [(a.GetIdx(),
                  np.concatenate((atom_features(
                      a, use_chirality=self.use_chirality),
                                  self._get_atom_properties(a))))
                 for a in mol.GetAtoms()]

    idx_nodes.sort()  # Sort by ind to ensure same order as rd_kit
    idx, nodes = list(zip(*idx_nodes))

@@ -315,6 +355,9 @@ class ConvMolFeaturizer(Featurizer):

    return ConvMol(nodes, canon_adj_list)

  def feature_length(self):
    return 75 + len(self.atom_properties)


class WeaveFeaturizer(Featurizer):
  name = ['weave_mol']
+6 −2
Original line number Diff line number Diff line
@@ -16,8 +16,8 @@ from deepchem.models.tensorgraph.fcnet import MultiTaskFitTransformRegressor
from deepchem.models.tensorgraph.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskClassifier
from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskRegressor
from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph, MPNNTensorGraph
from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor, ProgressiveMultitaskClassifier
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvModel, MPNNTensorGraph
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression

from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
@@ -25,3 +25,7 @@ from deepchem.models.tensorgraph.models.gan import GAN, WGAN
from deepchem.models.tensorgraph.models.text_cnn import TextCNNTensorGraph
from deepchem.models.tensorgraph.sequential import Sequential
from deepchem.models.tensorgraph.models.sequence_dnn import SequenceDNN

#################### Compatibility imports for renamed TensorGraph models. Remove below with DeepChem 3.0. ####################

from deepchem.models.tensorgraph.models.graph_models import GraphConvTensorGraph
+10 −36
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ import tensorflow as tf
from deepchem.utils.save import log
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.layers import Layer, SigmoidCrossEntropy, \
    Sigmoid, Feature, Label, Weights, Concat, WeightedError
    Sigmoid, Feature, Label, Weights, Concat, WeightedError, Stack
from deepchem.models.tensorgraph.layers import convert_to_layers
from deepchem.trans import undo_transforms

@@ -178,59 +178,33 @@ class TensorflowMultiTaskIRVClassifier(TensorGraph):
       https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2750043/
    """
    self.mol_features = Feature(shape=(None, self.n_features))
    self._labels = Label(shape=(None, self.n_tasks))
    self._weights = Weights(shape=(None, self.n_tasks))
    predictions = IRVLayer(self.n_tasks, self.K, in_layers=[self.mol_features])
    costs = []
    self.labels_fd = []
    outputs = []
    for task in range(self.n_tasks):
      task_output = Slice(task, 1, in_layers=[predictions])
      sigmoid = Sigmoid(in_layers=[task_output])
      self.add_output(sigmoid)
      outputs.append(sigmoid)

      label = Label(shape=(None, 1))
      self.labels_fd.append(label)
      label = Slice(task, axis=1, in_layers=[self._labels])
      cost = SigmoidCrossEntropy(in_layers=[label, task_output])
      costs.append(cost)
    all_cost = Concat(in_layers=costs, axis=1)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights]) + \
    loss = WeightedError(in_layers=[all_cost, self._weights]) + \
        IRVRegularize(predictions, self.penalty, in_layers=[predictions])
    self.set_loss(loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    """TensorGraph style implementation """
    for epoch in range(epochs):
      if not predict:
        logger.info('Starting epoch %i' % epoch)
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
        if w_b is not None:
          feed_dict[self.weights] = w_b
        feed_dict[self.mol_features] = X_b

        yield feed_dict
    outputs = Stack(axis=1, in_layers=outputs)
    self.add_output(outputs)

  def predict(self, dataset, transformers=[], outputs=None):
    out = super(TensorflowMultiTaskIRVClassifier, self).predict(
        dataset, transformers=transformers, outputs=outputs)
    out = np.concatenate(out, axis=1)
    out = np.round(out).astype(int)
    return out

  def predict_proba(self, dataset, transformers=[], outputs=None):
    out = super(TensorflowMultiTaskIRVClassifier, self).predict_proba(
        dataset, transformers=transformers, outputs=outputs)
    out = np.concatenate(out, axis=1)
    out = np.stack([1 - out, out], axis=2)
    return out
    return np.concatenate([1 - out, out], axis=2)
Loading