Commit 4419b3b6 authored by nd-02110114's avatar nd-02110114
Browse files

Merge branch 'master' into update-script

parents 68b9b783 48d40fc9
Loading
Loading
Loading
Loading
+12 −6
Original line number Original line Diff line number Diff line
import numpy as np
import numpy as np
from rdkit import Chem


import deepchem as dc
import deepchem as dc
from deepchem.feat import Featurizer
from deepchem.feat import Featurizer
@@ -55,11 +54,9 @@ possible_atom_list = [
possible_numH_list = [0, 1, 2, 3, 4]
possible_numH_list = [0, 1, 2, 3, 4]
possible_valence_list = [0, 1, 2, 3, 4, 5, 6]
possible_valence_list = [0, 1, 2, 3, 4, 5, 6]
possible_formal_charge_list = [-3, -2, -1, 0, 1, 2, 3]
possible_formal_charge_list = [-3, -2, -1, 0, 1, 2, 3]
possible_hybridization_list = [
# To avoid importing rdkit, this is a placeholder list of the correct
    Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
# length. These will be replaced with rdkit HybridizationType below
    Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,
possible_hybridization_list = ["SP", "SP2", "SP3", "SP3D", "SP3D2"]
    Chem.rdchem.HybridizationType.SP3D2
]
possible_number_radical_e_list = [0, 1, 2]
possible_number_radical_e_list = [0, 1, 2]
possible_chirality_list = ['R', 'S']
possible_chirality_list = ['R', 'S']


@@ -84,6 +81,14 @@ def get_feature_list(atom):
  atom: RDKit.rdchem.Atom
  atom: RDKit.rdchem.Atom
    Atom to get features for 
    Atom to get features for 
  """
  """
  # Replace the hybridization
  from rdkit import Chem
  global possible_hybridization_list
  possible_hybridization_list = [
      Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
      Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,
      Chem.rdchem.HybridizationType.SP3D2
  ]
  features = 6 * [0]
  features = 6 * [0]
  features[0] = safe_index(possible_atom_list, atom.GetSymbol())
  features[0] = safe_index(possible_atom_list, atom.GetSymbol())
  features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())
  features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())
@@ -91,6 +96,7 @@ def get_feature_list(atom):
  features[3] = safe_index(possible_formal_charge_list, atom.GetFormalCharge())
  features[3] = safe_index(possible_formal_charge_list, atom.GetFormalCharge())
  features[4] = safe_index(possible_number_radical_e_list,
  features[4] = safe_index(possible_number_radical_e_list,
                           atom.GetNumRadicalElectrons())
                           atom.GetNumRadicalElectrons())

  features[5] = safe_index(possible_hybridization_list, atom.GetHybridization())
  features[5] = safe_index(possible_hybridization_list, atom.GetHybridization())
  return features
  return features


+2 −2
Original line number Original line Diff line number Diff line
@@ -22,7 +22,7 @@ class TestConvMolFeaturizer(unittest.TestCase):
    # Note there is a central nitrogen of degree 4, with 4 carbons
    # Note there is a central nitrogen of degree 4, with 4 carbons
    # of degree 1 (connected only to central nitrogen).
    # of degree 1 (connected only to central nitrogen).
    raw_smiles = ['C[N+](C)(C)C']
    raw_smiles = ['C[N+](C)(C)C']
    import rdkit
    import rdkit.Chem
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    featurizer = ConvMolFeaturizer()
    mols = featurizer.featurize(mols)
    mols = featurizer.featurize(mols)
@@ -70,7 +70,7 @@ class TestConvMolFeaturizer(unittest.TestCase):
  def test_alkane(self):
  def test_alkane(self):
    """Test on simple alkane"""
    """Test on simple alkane"""
    raw_smiles = ['CCC']
    raw_smiles = ['CCC']
    import rdkit
    import rdkit.Chem
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = ConvMolFeaturizer()
    featurizer = ConvMolFeaturizer()
    mol_list = featurizer.featurize(mols)
    mol_list = featurizer.featurize(mols)
+158 −86
Original line number Original line Diff line number Diff line
@@ -2,8 +2,8 @@


import numpy as np
import numpy as np
import warnings
import warnings
from deepchem.utils.save import log
import sklearn.metrics
import sklearn.metrics
import logging
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import recall_score
from sklearn.metrics import recall_score
from sklearn.metrics import r2_score
from sklearn.metrics import r2_score
@@ -16,14 +16,23 @@ from sklearn.metrics import jaccard_score
from sklearn.metrics import f1_score
from sklearn.metrics import f1_score
from scipy.stats import pearsonr
from scipy.stats import pearsonr


logger = logging.getLogger(__name__)



def to_one_hot(y, n_classes=2):
def to_one_hot(y, n_classes=2):
  """Transforms label vector into one-hot encoding.
  """Transforms label vector into one-hot encoding.


    Turns y into vector of shape [n_samples, 2] (assuming binary labels).
  Turns y into vector of shape `(n_samples, n_classes)` with a one-hot
  encoding. 


  Parameters
  ----------
  y: np.ndarray
  y: np.ndarray
      A vector of shape [n_samples, 1]
    A vector of shape `(n_samples, 1)`

  Returns
  -------
  A numpy.ndarray of shape `(n_samples, n_classes)`.
  """
  """
  n_samples = np.shape(y)[0]
  n_samples = np.shape(y)[0]
  y_hot = np.zeros((n_samples, n_classes))
  y_hot = np.zeros((n_samples, n_classes))
@@ -34,8 +43,16 @@ def to_one_hot(y, n_classes=2):
def from_one_hot(y, axis=1):
def from_one_hot(y, axis=1):
  """Transorms label vector from one-hot encoding.
  """Transorms label vector from one-hot encoding.


  Parameters
  ----------
  y: np.ndarray
  y: np.ndarray
      A vector of shape [n_samples, num_classes]
    A vector of shape `(n_samples, num_classes)`
  axis: int, optional (default 1)
    The axis with one-hot encodings to reduce on.

  Returns
  -------
  A numpy.ndarray of shape `(n_samples,)`
  """
  """
  return np.argmax(y, axis=axis)
  return np.argmax(y, axis=axis)


@@ -62,6 +79,24 @@ def roc_auc_score(y, y_pred):




def accuracy_score(y, y_pred):
def accuracy_score(y, y_pred):
  """Compute accuracy score

  Computes accuracy score for classification tasks. Works for both
  binary and multiclass classification.

  Parameters
  ----------
  y: np.ndarray
    Of shape `(N_samples,)`
  y_pred: np.ndarray
    Of shape `(N_samples,)`

  Returns
  -------
  score: float
    The fraction of correctly classified samples. A number between 0
    and 1.
  """
  y = _ensure_class_labels(y)
  y = _ensure_class_labels(y)
  y_pred = _ensure_class_labels(y_pred)
  y_pred = _ensure_class_labels(y_pred)
  return sklearn.metrics.accuracy_score(y, y_pred)
  return sklearn.metrics.accuracy_score(y, y_pred)
@@ -83,8 +118,7 @@ def pearson_r2_score(y, y_pred):




def jaccard_index(y, y_pred):
def jaccard_index(y, y_pred):
  """Computes Jaccard Index which is the Intersection Over Union metric
  """Computes Jaccard Index which is the Intersection Over Union metric which is commonly used in image segmentation tasks
       which is commonly used in image segmentation tasks


  Parameters
  Parameters
  ----------
  ----------
@@ -95,13 +129,17 @@ def jaccard_index(y, y_pred):




def pixel_error(y, y_pred):
def pixel_error(y, y_pred):
  """defined as 1 - the maximal F-score of pixel similarity,
  """An error metric in case y, y_pred are images.
       or squared Euclidean distance between the original and the result labels.

  Defined as 1 - the maximal F-score of pixel similarity, or squared
  Euclidean distance between the original and the result labels.


  Parameters
  Parameters
  ----------
  ----------
      y: ground truth array
  y: np.ndarray
      y_pred: predicted array
    ground truth array
  y_pred: np.ndarray
    predicted array
  """
  """
  return 1 - f1_score(y, y_pred)
  return 1 - f1_score(y, y_pred)


@@ -133,16 +171,22 @@ def kappa_score(y_true, y_pred):


  Note that this implementation of Cohen's kappa expects binary labels.
  Note that this implementation of Cohen's kappa expects binary labels.


    Args:
  Parameters
      y_true: Numpy array containing true values.
  ----------
      y_pred: Numpy array containing predicted values.
  y_true: np.ndarray
    Numpy array containing true values.
  y_pred: np.ndarray
    Numpy array containing predicted values.


    Returns:
  Returns
      kappa: Numpy array containing kappa for each classification task.
  -------
  kappa: np.ndarray
    Numpy array containing kappa for each classification task.


    Raises:
  Raises
      AssertionError: If y_true and y_pred are not the same size, or if class
  ------
        labels are not in [0, 1].
  AssertionError: If y_true and y_pred are not the same size, or if
  class labels are not in [0, 1].
  """
  """
  assert len(y_true) == len(y_pred), 'Number of examples does not match.'
  assert len(y_true) == len(y_pred), 'Number of examples does not match.'
  yt = np.asarray(y_true, dtype=int)
  yt = np.asarray(y_true, dtype=int)
@@ -165,11 +209,8 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
  """BEDROC metric implemented according to Truchon and Bayley that modifies
  """BEDROC metric implemented according to Truchon and Bayley that modifies
  the ROC score by allowing for a factor of early recognition
  the ROC score by allowing for a factor of early recognition


    References:
  Parameters
      The original paper by Truchon et al. is located at
  ----------
      https://pubs.acs.org/doi/pdf/10.1021/ci600426e

    Args:
  y_true (array_like):
  y_true (array_like):
    Binary class labels. 1 for positive class, 0 otherwise
    Binary class labels. 1 for positive class, 0 otherwise
  y_pred (array_like):
  y_pred (array_like):
@@ -177,9 +218,14 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
  alpha (float), default 20.0:
  alpha (float), default 20.0:
    Early recognition parameter
    Early recognition parameter


    Returns:
  Returns
  -------
  float: Value in [0, 1] that indicates the degree of early recognition
  float: Value in [0, 1] that indicates the degree of early recognition


  Notes
  -----
  The original paper by Truchon et al. is located at
  https://pubs.acs.org/doi/pdf/10.1021/ci600426e
  """
  """


  assert len(y_true) == len(y_pred), 'Number of examples do not match'
  assert len(y_true) == len(y_pred), 'Number of examples do not match'
@@ -203,23 +249,45 @@ def bedroc_score(y_true, y_pred, alpha=20.0):




class Metric(object):
class Metric(object):
  """Wrapper class for computing user-defined metrics."""
  """Wrapper class for computing user-defined metrics.

  There are a variety of different metrics this class aims to support.
  At the most simple, metrics for classification and regression that
  assume that values to compare are scalars. More complicated, there
  may perhaps be two image arrays that need to be compared.

  The `Metric` class provides a wrapper for standardizing the API
  around different classes of metrics that may be useful for DeepChem
  models. The implementation provides a few non-standard conveniences
  such as built-in support for multitask and multiclass metrics, and
  support for multidimensional outputs.
  """


  def __init__(self,
  def __init__(self,
               metric,
               metric,
               task_averager=None,
               task_averager=None,
               name=None,
               name=None,
               threshold=None,
               threshold=None,
               verbose=True,
               mode=None,
               mode=None,
               compute_energy_metric=False):
               compute_energy_metric=False):
    """
    """
        Args:
    Parameters
          metric: function that takes args y_true, y_pred (in that order) and
    ----------
    metric: function
      function that takes args y_true, y_pred (in that order) and
      computes desired score.
      computes desired score.
          task_averager: If not None, should be a function that averages metrics
    task_averager: function, optional
                  across tasks. For example, task_averager=np.mean. If task_averager
      If not None, should be a function that averages metrics across
                  is provided, this task will be inherited as a multitask metric.
      tasks. For example, task_averager=np.mean. If task_averager is
      provided, this task will be inherited as a multitask metric.
    name: str, optional
      Name of this metric
    threshold: float, optional
      Used for binary metrics and is the threshold for the positive
      class
    mode: str, optional
      Must be either classification or regression.
    compute_energy_metric: TODO(rbharath): Should this be removed? 
    """
    """
    self.metric = metric
    self.metric = metric
    self.task_averager = task_averager
    self.task_averager = task_averager
@@ -231,13 +299,12 @@ class Metric(object):
        self.name = self.task_averager.__name__ + "-" + self.metric.__name__
        self.name = self.task_averager.__name__ + "-" + self.metric.__name__
    else:
    else:
      self.name = name
      self.name = name
    self.verbose = verbose
    self.threshold = threshold
    self.threshold = threshold
    if mode is None:
    if mode is None:
      if self.metric.__name__ in [
      if self.metric.__name__ in [
          "roc_auc_score", "matthews_corrcoef", "recall_score",
          "roc_auc_score", "matthews_corrcoef", "recall_score",
          "accuracy_score", "kappa_score", "precision_score",
          "accuracy_score", "kappa_score", "precision_score",
          "balanced_accuracy_score", "prc_auc_score", "f1_score"
          "balanced_accuracy_score", "prc_auc_score", "f1_score", "bedroc_score"
      ]:
      ]:
        mode = "classification"
        mode = "classification"
      elif self.metric.__name__ in [
      elif self.metric.__name__ in [
@@ -311,7 +378,7 @@ class Metric(object):


      metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
      metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
      computed_metrics.append(metric_value)
      computed_metrics.append(metric_value)
    log("computed_metrics: %s" % str(computed_metrics), self.verbose)
    logger.info("computed_metrics: %s" % str(computed_metrics))
    if n_tasks == 1:
    if n_tasks == 1:
      computed_metrics = computed_metrics[0]
      computed_metrics = computed_metrics[0]
    if not self.is_multitask:
    if not self.is_multitask:
@@ -334,14 +401,19 @@ class Metric(object):
  def compute_singletask_metric(self, y_true, y_pred, w):
  def compute_singletask_metric(self, y_true, y_pred, w):
    """Compute a metric value.
    """Compute a metric value.


    Args:
    Parameters
      y_true: A list of arrays containing true values for each task.
    ----------
      y_pred: A list of arrays containing predicted values for each task.
    y_true: list
      A list of arrays containing true values for each task.
    y_pred: list
      A list of arrays containing predicted values for each task.


    Returns:
    Returns
    -------
    Float metric value.
    Float metric value.


    Raises:
    Raises
    ------
    NotImplementedError: If metric_str is not in METRICS.
    NotImplementedError: If metric_str is not in METRICS.
    """
    """


+155 −46
Original line number Original line Diff line number Diff line
@@ -24,6 +24,13 @@ class InteratomicL2Distances(tf.keras.layers.Layer):
    return config
    return config


  def call(self, inputs):
  def call(self, inputs):
    """Invokes this layer.

    Parameters
    ----------
    inputs: list
      Should be of form `inputs=[coords, nbr_list]` where `coords` is a tensor of shape `(None, N, 3)` and `nbr_list` is a list.
    """
    if len(inputs) != 2:
    if len(inputs) != 2:
      raise ValueError("InteratomicDistances requires coords,nbr_list")
      raise ValueError("InteratomicDistances requires coords,nbr_list")
    coords, nbr_list = (inputs[0], inputs[1])
    coords, nbr_list = (inputs[0], inputs[1])
@@ -38,6 +45,16 @@ class InteratomicL2Distances(tf.keras.layers.Layer):




class GraphConv(tf.keras.layers.Layer):
class GraphConv(tf.keras.layers.Layer):
  """Graph Convolutional Layers
  
  This layer implements the graph convolution introduced in 

  Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015. https://arxiv.org/abs/1509.09292
  
  The graph convolution combines per-node feature vectures in a
  nonlinear fashion with the feature vectors for neighboring nodes.
  This "blends" information in local neighborhoods of a graph.
  """


  def __init__(self,
  def __init__(self,
               out_channel,
               out_channel,
@@ -45,6 +62,24 @@ class GraphConv(tf.keras.layers.Layer):
               max_deg=10,
               max_deg=10,
               activation_fn=None,
               activation_fn=None,
               **kwargs):
               **kwargs):
    """Initialize a graph convolutional layer.

    Parameters
    ----------
    out_channel: int
      The number of output channels per graph node.
    min_deg: int, optional (default 0)
      The minimum allowed degree for each graph node.
    max_deg: int, optional (default 10)
      The maximum allowed degree for each graph node. Note that this
      is set to 10 to handle complex molecules (some organometallic
      compounds have strange structures). If you're using this for
      non-molecular applications, you may need to set this much higher
      depending on your dataset.
    activation_fn: function
      A nonlinear activation function to apply. If you're not sure,
      `tf.nn.relu` is probably a good default for your application.
    """
    super(GraphConv, self).__init__(**kwargs)
    super(GraphConv, self).__init__(**kwargs)
    self.out_channel = out_channel
    self.out_channel = out_channel
    self.min_degree = min_deg
    self.min_degree = min_deg
@@ -143,8 +178,27 @@ class GraphConv(tf.keras.layers.Layer):




class GraphPool(tf.keras.layers.Layer):
class GraphPool(tf.keras.layers.Layer):
  """A GraphPool gathers data from local neighborhoods of a graph.

  This layer does a max-pooling over the feature vectors of atoms in a
  neighborhood. You can think of this layer as analogous to a max-pooling layer
  for 2D convolutions but which operates on graphs instead.
  """


  def __init__(self, min_degree=0, max_degree=10, **kwargs):
  def __init__(self, min_degree=0, max_degree=10, **kwargs):
    """Initialize this layer

    Parameters
    ----------
    min_deg: int, optional (default 0)
      The minimum allowed degree for each graph node.
    max_deg: int, optional (default 10)
      The maximum allowed degree for each graph node. Note that this
      is set to 10 to handle complex molecules (some organometallic
      compounds have strange structures). If you're using this for
      non-molecular applications, you may need to set this much higher
      depending on your dataset.
    """
    super(GraphPool, self).__init__(**kwargs)
    super(GraphPool, self).__init__(**kwargs)
    self.min_degree = min_degree
    self.min_degree = min_degree
    self.max_degree = max_degree
    self.max_degree = max_degree
@@ -195,8 +249,36 @@ class GraphPool(tf.keras.layers.Layer):




class GraphGather(tf.keras.layers.Layer):
class GraphGather(tf.keras.layers.Layer):
  """A GraphGather layer pools node-level feature vectors to create a graph feature vector.

  Many graph convolutional networks manipulate feature vectors per
  graph-node. For a molecule for example, each node might represent an
  atom, and the network would manipulate atomic feature vectors that
  summarize the local chemistry of the atom. However, at the end of
  the application, we will likely want to work with a molecule level
  feature representation. The `GraphGather` layer creates a graph level
  feature vector by combining all the node-level feature vectors.

  One subtlety about this layer is that it depends on the
  `batch_size`. This is done for internal implementation reasons. The
  `GraphConv`, and `GraphPool` layers pool all nodes from all graphs
  in a batch that's being processed. The `GraphGather` reassembles
  these jumbled node feature vectors into per-graph feature vectors.
  """


  def __init__(self, batch_size, activation_fn=None, **kwargs):
  def __init__(self, batch_size, activation_fn=None, **kwargs):
    """Initialize this layer.

    Parameters
    ---------
    batch_size: int
      The batch size for this layer. Note that the layer's behavior
      changes depending on the batch size.
    activation_fn: function
      A nonlinear activation function to apply. If you're not sure,
      `tf.nn.relu` is probably a good default for your application.
    """

    super(GraphGather, self).__init__(**kwargs)
    super(GraphGather, self).__init__(**kwargs)
    self.batch_size = batch_size
    self.batch_size = batch_size
    self.activation_fn = activation_fn
    self.activation_fn = activation_fn
@@ -208,7 +290,15 @@ class GraphGather(tf.keras.layers.Layer):
    return config
    return config


  def call(self, inputs):
  def call(self, inputs):
    # x = [atom_features, deg_slice, membership, deg_adj_list placeholders...]
    """Invoking this layer.

    Parameters
    ----------
    inputs: list
      This list should consist of `inputs = [atom_features, deg_slice,
      membership, deg_adj_list placeholders...]`. These are all
      tensors that are created/process by `GraphConv` and `GraphPool`
    """
    atom_features = inputs[0]
    atom_features = inputs[0]


    # Extract graph topology
    # Extract graph topology
@@ -507,16 +597,15 @@ class IterRefLSTMEmbedding(tf.keras.layers.Layer):
    Parameters
    Parameters
    ----------
    ----------
    inputs: list
    inputs: list
      List of two tensors (X, Xp). X should be of shape (n_test, n_feat) and
      List of two tensors (X, Xp). X should be of shape (n_test,
      Xp should be of shape (n_support, n_feat) where n_test is the size of
      n_feat) and Xp should be of shape (n_support, n_feat) where
      the test set, n_support that of the support set, and n_feat is the number
      n_test is the size of the test set, n_support that of the
      of per-atom features.
      support set, and n_feat is the number of per-atom features.


    Returns
    Returns
    -------
    -------
    list
    Returns two tensors of same shape as input. Namely the output
      Returns two tensors of same shape as input. Namely the output shape will
    shape will be [(n_test, n_feat), (n_support, n_feat)]
      be [(n_test, n_feat), (n_support, n_feat)]
    """
    """
    if len(inputs) != 2:
    if len(inputs) != 2:
      raise ValueError(
      raise ValueError(
@@ -560,10 +649,11 @@ class IterRefLSTMEmbedding(tf.keras.layers.Layer):
class SwitchedDropout(tf.keras.layers.Layer):
class SwitchedDropout(tf.keras.layers.Layer):
  """Apply dropout based on an input.
  """Apply dropout based on an input.


  This is required for uncertainty prediction.  The standard Keras Dropout
  This is required for uncertainty prediction.  The standard Keras
  layer only performs dropout during training, but we sometimes need to do it
  Dropout layer only performs dropout during training, but we
  during prediction.  The second input to this layer should be a scalar equal to
  sometimes need to do it during prediction.  The second input to this
  0 or 1, indicating whether to perform dropout.
  layer should be a scalar equal to 0 or 1, indicating whether to
  perform dropout.
  """
  """


  def __init__(self, rate, **kwargs):
  def __init__(self, rate, **kwargs):
@@ -584,6 +674,13 @@ class WeightedLinearCombo(tf.keras.layers.Layer):
  """Computes a weighted linear combination of input layers, with the weights defined by trainable variables."""
  """Computes a weighted linear combination of input layers, with the weights defined by trainable variables."""


  def __init__(self, std=0.3, **kwargs):
  def __init__(self, std=0.3, **kwargs):
    """Initialize this layer.

    Parameters
    ----------
    std: float, optional (default 0.3)
      The standard deviation to use when randomly initializing weights.
    """
    super(WeightedLinearCombo, self).__init__(**kwargs)
    super(WeightedLinearCombo, self).__init__(**kwargs)
    self.std = std
    self.std = std


@@ -617,17 +714,18 @@ class CombineMeanStd(tf.keras.layers.Layer):
  def __init__(self, training_only=False, noise_epsilon=1.0, **kwargs):
  def __init__(self, training_only=False, noise_epsilon=1.0, **kwargs):
    """Create a CombineMeanStd layer.
    """Create a CombineMeanStd layer.


    This layer should have two inputs with the same shape, and its output also has the
    This layer should have two inputs with the same shape, and its
    same shape.  Each element of the output is a Gaussian distributed random number
    output also has the same shape.  Each element of the output is a
    whose mean is the corresponding element of the first input, and whose standard
    Gaussian distributed random number whose mean is the corresponding
    deviation is the corresponding element of the second input.
    element of the first input, and whose standard deviation is the
    corresponding element of the second input.


    Parameters
    Parameters
    ----------
    ----------
    training_only: bool
    training_only: bool
      if True, noise is only generated during training.  During prediction, the output
      if True, noise is only generated during training.  During
      is simply equal to the first input (that is, the mean of the distribution used
      prediction, the output is simply equal to the first input (that
      during training).
      is, the mean of the distribution used during training).
    noise_epsilon: float
    noise_epsilon: float
      The noise is scaled by this factor
      The noise is scaled by this factor
    """
    """
@@ -671,10 +769,10 @@ class Stack(tf.keras.layers.Layer):
class Variable(tf.keras.layers.Layer):
class Variable(tf.keras.layers.Layer):
  """Output a trainable value.
  """Output a trainable value.


  Due to a quirk of Keras, you must pass an input value when invoking this layer.
  Due to a quirk of Keras, you must pass an input value when invoking
  It doesn't matter what value you pass.  Keras assumes every layer that is not
  this layer.  It doesn't matter what value you pass.  Keras assumes
  an Input will have at least one parent, and violating this assumption causes
  every layer that is not an Input will have at least one parent, and
  errors during evaluation.
  violating this assumption causes errors during evaluation.
  """
  """


  def __init__(self, initial_value, **kwargs):
  def __init__(self, initial_value, **kwargs):
@@ -830,8 +928,11 @@ class VinaFreeEnergy(tf.keras.layers.Layer):
class NeighborList(tf.keras.layers.Layer):
class NeighborList(tf.keras.layers.Layer):
  """Computes a neighbor-list in Tensorflow.
  """Computes a neighbor-list in Tensorflow.


  Neighbor-lists (also called Verlet Lists) are a tool for grouping atoms which
  Neighbor-lists (also called Verlet Lists) are a tool for grouping
  are close to each other spatially
  atoms which are close to each other spatially. This layer computes a
  Neighbor List from a provided tensor of atomic coordinates. You can
  think of this as a general "k-means" layer, but optimized for the
  case `k==3`.


  TODO(rbharath): Make this layer support batching.
  TODO(rbharath): Make this layer support batching.
  """
  """
@@ -1121,9 +1222,12 @@ class NeighborList(tf.keras.layers.Layer):
class AtomicConvolution(tf.keras.layers.Layer):
class AtomicConvolution(tf.keras.layers.Layer):
  """Implements the atomic convolutional transform introduced in
  """Implements the atomic convolutional transform introduced in


  Gomes, Joseph, et al. "Atomic convolutional networks for predicting protein-ligand binding affinity." arXiv preprint arXiv:1703.10603 (2017).
  Gomes, Joseph, et al. "Atomic convolutional networks for predicting
  protein-ligand binding affinity." arXiv preprint arXiv:1703.10603
  (2017).


  At a high level, this transform performs a sort of graph convolution on the nearest neighbors graph in 3D space.
  At a high level, this transform performs a graph convolution
  on the nearest neighbors graph in 3D space.
  """
  """


  def __init__(self,
  def __init__(self,
@@ -1433,7 +1537,8 @@ class BetaShare(tf.keras.layers.Layer):
  Parameters
  Parameters
  ----------
  ----------
  in_layers: list of Layers or tensors
  in_layers: list of Layers or tensors
    tensors in list must be the same size and list must include two or more tensors
    tensors in list must be the same size and list must include two or
    more tensors


  Returns
  Returns
  -------
  -------
@@ -1656,15 +1761,15 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
  GraphCNNPool Layer from Robust Spatial Filtering with Graph Convolutional Neural Networks
  GraphCNNPool Layer from Robust Spatial Filtering with Graph Convolutional Neural Networks
  https://arxiv.org/abs/1703.00792
  https://arxiv.org/abs/1703.00792


  This is a learnable pool operation
  This is a learnable pool operation It constructs a new adjacency
  It constructs a new adjacency matrix for a graph of specified number of nodes.
  matrix for a graph of specified number of nodes.


  This differs from our other pool opertions which set vertices to a function value
  This differs from our other pool operations which set vertices to a
  without altering the adjacency matrix.
  function value without altering the adjacency matrix.


  $V_{emb} = SpatialGraphCNN({V_{in}})$\\
  ..math:: V_{emb} = SpatialGraphCNN({V_{in}})
  $V_{out} = \sigma(V_{emb})^{T} * V_{in}$
  ..math:: V_{out} = \sigma(V_{emb})^{T} * V_{in}
  $A_{out} = V_{emb}^{T} * A_{in} * V_{emb}$
  ..math:: A_{out} = V_{emb}^{T} * A_{in} * V_{emb}
  """
  """


  def __init__(self, num_vertices, **kwargs):
  def __init__(self, num_vertices, **kwargs):
@@ -1693,7 +1798,6 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
    ----------
    ----------
    num_filters: int
    num_filters: int
      Number of filters to have in the output
      Number of filters to have in the output

    in_layers: list of Layers or tensors
    in_layers: list of Layers or tensors
      [V, A, mask]
      [V, A, mask]
      V are the vertex features must be of shape (batch, vertex, channel)
      V are the vertex features must be of shape (batch, vertex, channel)
@@ -1704,9 +1808,10 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
      mask is optional, to be used when not every graph has the
      mask is optional, to be used when not every graph has the
      same number of vertices
      same number of vertices


    Returns: tf.tensor
    Returns
    Returns a tf.tensor with a graph convolution applied
    -------
    The shape will be (batch, vertex, self.num_filters)
    Returns a `tf.tensor` with a graph convolution applied
    The shape will be `(batch, vertex, self.num_filters)`.
    """
    """
    if len(inputs) == 3:
    if len(inputs) == 3:
      V, A, mask = inputs
      V, A, mask = inputs
@@ -2761,7 +2866,9 @@ class GatedRecurrentUnit(tf.keras.layers.Layer):


class SetGather(tf.keras.layers.Layer):
class SetGather(tf.keras.layers.Layer):
  """set2set gather layer for graph-based model
  """set2set gather layer for graph-based model
  model using this layer must set pad_batches=True """

  Models using this layer must set `pad_batches=True`.
  """


  def __init__(self, M, batch_size, n_hidden=100, init='orthogonal', **kwargs):
  def __init__(self, M, batch_size, n_hidden=100, init='orthogonal', **kwargs):
    """
    """
@@ -2799,7 +2906,9 @@ class SetGather(tf.keras.layers.Layer):


  def call(self, inputs):
  def call(self, inputs):
    """Perform M steps of set2set gather,
    """Perform M steps of set2set gather,
        detailed descriptions in: https://arxiv.org/abs/1511.06391 """

    Detailed descriptions in: https://arxiv.org/abs/1511.06391
    """
    atom_features, atom_split = inputs
    atom_features, atom_split = inputs
    c = tf.zeros((self.batch_size, self.n_hidden))
    c = tf.zeros((self.batch_size, self.n_hidden))
    h = tf.zeros((self.batch_size, self.n_hidden))
    h = tf.zeros((self.batch_size, self.n_hidden))
+27 −1
Original line number Original line Diff line number Diff line
@@ -18,7 +18,33 @@ def load_clintox(featurizer='ECFP',
                 data_dir=None,
                 data_dir=None,
                 save_dir=None,
                 save_dir=None,
                 **kwargs):
                 **kwargs):
  """Load clintox datasets."""
  """Load clintox datasets.

  The ClinTox dataset compares drugs approved by the FDA and
  drugs that have failed clinical trials for toxicity reasons.
  The dataset includes two classification tasks for 1491 drug
  compounds with known chemical structures: (1) clinical trial
  toxicity (or absence of toxicity) and (2) FDA approval status.
  List of FDA-approved drugs are compiled from the SWEETLEAD
  database, and list of drugs that failed clinical trials for
  toxicity reasons are compiled from the Aggregate Analysis of
  ClinicalTrials.gov(AACT) database.

  The data file contains a csv table, in which columns below are
  used:
     "smiles" - SMILES representation of the molecular structure
     "FDA_APPROVED" - FDA approval status
     "CT_TOX" - Clinical trial results

References:
  Gayvert, Kaitlyn M., Neel S. Madhukar, and Olivier Elemento. "A data-driven approach to predicting successes and failures of clinical trials." Cell chemical biology 23.10 (2016): 1294-1301.

  Artemov, Artem V., et al. "Integrated deep learned transcriptomic and structure-based predictor of clinical trials outcomes." bioRxiv (2016): 095653.

  Novick, Paul A., et al. "SWEETLEAD: an in silico database of approved drugs, regulated chemicals, and herbal isolates for computer-aided drug discovery." PloS one 8.11 (2013): e79568.

  Aggregate Analysis of ClincalTrials.gov (AACT) Database. https://www.ctti-clinicaltrials.org/aact-database
  """
  if data_dir is None:
  if data_dir is None:
    data_dir = DEFAULT_DIR
    data_dir = DEFAULT_DIR
  if save_dir is None:
  if save_dir is None:
Loading