Commit 53db71a4 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

cleanup

parent 64a57200
Loading
Loading
Loading
Loading
+24 −30
Original line number Diff line number Diff line
@@ -190,36 +190,30 @@ class Smiles2Vec(KerasModel):

class ChemCeption(KerasModel):
  """
  Implements the ChemCeption model that leverages the
  representational capacities of convolutional neural networks
  (CNNs) to predict molecular properties.

  The model is based on the description in Goh et al.,
  "Chemception: A Deep Neural Network with Minimal Chemistry
  Knowledge Matches the Performance of Expert-developed
  QSAR/QSPR Models" (https://arxiv.org/pdf/1706.06689.pdf).  The
  authors use an image based representation of the molecule,
  where pixels encode different atomic and bond properties. More
  details on the image repres- entations can be found at
  https://arxiv.org/abs/1710.02238

  The model consists of a Stem Layer that reduces the image
  resolution for the layers to follow. The output of the Stem
  Layer is followed by a series of Inception-Resnet blocks & a
  Reduction layer. Layers in the Inception-Resnet blocks process
  image tensors at multiple resolutions and use a ResNet style
  skip-connection, combining features from different
  resolutions. The Reduction layers reduce the spatial extent of
  the image by max-pooling and 2-strided convolutions. More
  details on these layers can be found in the ChemCeption paper
  referenced above. The output of the final Reduction layer is
  subject to a Global Average Pooling, and a fully-connected
  layer maps the features to downstream outputs.

  In the ChemCeption paper, the authors perform real-time image
  augmentation by rotating images between 0 to 180 degrees. This
  can be done during model training by setting the augment
  argument to True.
  Implements the ChemCeption model that leverages the representational capacities
  of convolutional neural networks (CNNs) to predict molecular properties.

  The model is based on the description in Goh et al., "Chemception: A Deep
  Neural Network with Minimal Chemistry Knowledge Matches the Performance of
  Expert-developed QSAR/QSPR Models" (https://arxiv.org/pdf/1706.06689.pdf).
  The authors use an image based representation of the molecule, where pixels
  encode different atomic and bond properties. More details on the image repres-
  entations can be found at https://arxiv.org/abs/1710.02238

  The model consists of a Stem Layer that reduces the image resolution for the
  layers to follow. The output of the Stem Layer is followed by a series of
  Inception-Resnet blocks & a Reduction layer. Layers in the Inception-Resnet
  blocks process image tensors at multiple resolutions and use a ResNet style
  skip-connection, combining features from different resolutions. The Reduction
  layers reduce the spatial extent of the image by max-pooling and 2-strided
  convolutions. More details on these layers can be found in the ChemCeption
  paper referenced above. The output of the final Reduction layer is subject to
  a Global Average Pooling, and a fully-connected layer maps the features to
  downstream outputs.

  In the ChemCeption paper, the authors perform real-time image augmentation by
  rotating images between 0 to 180 degrees. This can be done during model
  training by setting the augment argument to True.
  """

  def __init__(self,

deepchem/utils/data.py

deleted100644 → 0
+0 −52
Original line number Diff line number Diff line
"""Utilities for handling datasets."""
import numpy as np
import deepchem as dc

def datasetify(data_like):
  """

  This utility function attempts to intelligently convert it's
  input into a DeepChem dataset object. Here are the classes of
  common sense transformations it attempts to apply:

  - `dc.data.Dataset`: If the input is already a
  `dc.data.Dataset`, just return unmodified.
  - List of strings: The strings are assumed to be unique identifiers. They are packaged into `dc.data.NumpyDataset`, as follows.

  >>> import deepchem as dc
  >>> import numpy as np
  >>> l = ["C", "CC"]
  >>> dc.data.NumpyDataset(X=np.array(l), ids=np.array(l))

  The double packaging as `X` and `ids` is awkward, but it's
  currently not feasible to create a `dc.data.NumpyDataset`
  without `X` specified.

  - Numpy array: This array is assumed to be the `X` feature array. This is packaged as follows

  >>> import deepchem as dc
  >>> import numpy as np
  >>> X = np.random.rand(5, 5)
  >>> dc.data.NumpyDataset(X)

  Parameters
  ----------
  data_like: object
    Some object which will attempt to be converted to a
    `dc.data.Dataset` object.

  Returns
  -------
  If successful in conversion, returns `dc.data.NumpyDataset`
  object. Else raises `ValueError`.
  """
  if isinstance(data_like, dc.data.Dataset):
    return data_like
  elif isinstance(data_like, list):
    if len(data_like) > 0 and isinstance(data_like[0], str):
      return dc.data.NumpyDataset(X=np.array(data_like), ids=np.array(data_like))
  elif isinstance(data_like, np.ndarray):
    return dc.data.NumpyDataset(data_like)
  else:
    raise ValueError("Cannot convert into Dataset object.")