Commit c52454c8 authored by arunppsg's avatar arunppsg
Browse files

Added documentation examples for featurizers

Examples were added to atomic conformation featurizer,
circular fingerprint featurizer, molgan featurizer.
parent 7435b3b1
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -125,6 +125,25 @@ class AtomicConformationFeaturizer(Featurizer):

  Otherwise, it is assumed to be a SMILES string.  RDKit is used to generate a
  3D conformation and to compute formal and partial charges.

  Examples
  --------
  >>> import deepchem as dc
  >>> smiles = ['CCC']
  >>> featurizer = dc.feat.AtomicConformationFeaturizer()
  >>> features = featurizer.featurize()
  >>> features[0].num_atoms
  11
  >>> sum(features[0].atomic_number == 6)
  3
  >>> sum(features[0].atmoic_number == 1)
  8
  >>> features[0].formal_charge
  array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
  >>> features[0].partial_charge
  array([-0.06564544, -0.06564544, -0.05903836,  0.02302528,  0.02302528,
        0.02302528,  0.02302528,  0.02302528,  0.02302528,  0.0260888 ,
        0.0260888 ], dtype=float32)
  """

  def _featurize(self, datapoint: str) -> AtomicConformation:
+1 −1
Original line number Diff line number Diff line
@@ -107,7 +107,7 @@ def safe_index(l, e):
  """
  try:
    return l.index(e)
  except:
  except ValueError:
    return len(l)


+23 −1
Original line number Diff line number Diff line
@@ -14,7 +14,8 @@ class CircularFingerprint(MolecularFeaturizer):

  Extended Connectivity Circular Fingerprints compute a bag-of-words style
  representation of a molecule by breaking it into local neighborhoods and
  hashing into a bit vector of the specified size. See [1]_ for more details.
  hashing into a bit vector of the specified size. It is used specifically
  for structure-activity modelling. See [1]_ for more details.

  References
  ----------
@@ -24,6 +25,27 @@ class CircularFingerprint(MolecularFeaturizer):
  Note
  ----
  This class requires RDKit to be installed.

  Examples
  --------
  >>> import deepchem as dc
  >>> from rdkit import Chem
  >>> smiles = 'C1=CC=CC=C1'
  >>> mol = Chem.MolFromSmiles(smiles)

  >>> # Example 1: (size = 2048, radius = 4)
  >>> featurizer = dc.feat.CircularFingerprint(size=2048, radius=4)
  >>> fingerprint = featurizer([mol])
  >>> fingerprint.shape
  (1, 2048)

  >>> # Example 2: (size = 2048, radius = 4, sparse = True, smiles = True)
  >>> featurizer = dc.feat.CircularFingerprint(size=2048, radius=8,
  ...                                          sparse=True, smiles=True)
  >>> featurizer([mol])
  array([{98513984: {'smiles': 'ccc', 'count': 6}, 2763854213: {'smiles':
    'ccccc', 'count': 6}, 3218693969: {'smiles': '', 'count': 6}, 3741631696:
    {'smiles': 'c1ccccc1', 'count': 1}}], dtype=object)
  """

  def __init__(self,
+17 −2
Original line number Diff line number Diff line
@@ -36,7 +36,22 @@ class MolGanFeaturizer(MolecularFeaturizer):
  Featurizer for MolGAN de-novo molecular generation [1]_.
  The default representation is in form of GraphMatrix object.
  It is wrapper for two matrices containing atom and bond type information.
  The class also provides reverse capabilities."""
  The class also provides reverse capabilities.

  Examples
  --------
  >>> import deepchem as dc
  >>> from rdkit import Chem
  >>> rdkit_mol, smiles_mol = Chem.MolFromSmiles('CCC'), 'C1=CC=CC=C1'
  >>> molecules = [rdkit_mol, smiles_mol]
  >>> featurizer = dc.feat.MolGanFeaturizer()
  >>> features = featurizer.featurize(molecules)
  >>> type(features[0])
  deepchem.feat.molecule_featurizers.molgan_featurizer.GraphMatrix
  >>> molecules = featurizer.defeaturize(features)
  >>> type(molecules[0])
  rdkit.Chem.rdchem.Mol
  """

  def __init__(
      self,
@@ -63,7 +78,7 @@ class MolGanFeaturizer(MolecularFeaturizer):
    References
    ---------
    .. [1] Nicola De Cao et al. "MolGAN: An implicit generative model
    for small molecular graphs`<https://arxiv.org/abs/1805.11973>`"
    for small molecular graphs`<https://arxiv.org/abs/1805.11973>`_"
    """

    self.max_atom_count = max_atom_count