Commit a650b1d0 authored by arunppsg's avatar arunppsg
Browse files

Added examples in documentation for featurizers

Added examples for MACCSKeysFingerprint, PubChemFingerprint
functions in graph_features.py.
parent c52454c8
Loading
Loading
Loading
Loading
+34 −3
Original line number Diff line number Diff line
@@ -149,7 +149,7 @@ def get_feature_list(atom):

  Parameters
  ----------
  atom: RDKit.rdchem.Atom
  atom: RDKit.Chem.rdchem.Atom
    Atom to get features for

  Examples
@@ -259,7 +259,7 @@ def atom_to_id(atom):

  Parameters
  ----------
  atom: RDKit.rdchem.Atom
  atom: RDKit.Chem.rdchem.Atom
    Atom to convert to ids.

  Returns
@@ -281,6 +281,8 @@ def atom_features(atom,

  Parameters
  ----------
  atom: RDKit.Chem.rdchem.Atom
    Atom to compute features on.
  bool_id_feat: bool, optional
    Return an array of unique identifiers corresponding to atom type.
  explicit_H: bool, optional
@@ -288,6 +290,17 @@ def atom_features(atom,
  use_chirality: bool, optional
    If true, use chirality information.

  Examples
  --------
  >>> from rdkit import Chem
  >>> mol = Chem.MolFromSmiles('CCC')
  >>> atom = mol.GetAtoms()[0]
  >>> features = dc.feat.graph_features.atom_features(atom)
  >>> type(features)
  numpy.ndarray
  >>> features.shape
  (75,)

  Returns
  -------
  np.ndarray of per-atom features.
@@ -376,9 +389,20 @@ def bond_features(bond, use_chirality=False):

  Parameters
  ----------
  bond: rdkit.Chem.rdchem.Bond
    Bond to compute features on.
  use_chirality: bool, optional
    If true, use chirality information.

  Examples
  --------
  >>> from rdkit import Chem
  >>> mol = Chem.MolFromSmiles('CCC')
  >>> bond = mol.GetBonds()[0]
  >>> bond_features = dc.feat.graph_features.bond_features(bond)
  >>> bond_features
  array([ True, False, False, False, False, False])

  Note
  ----
  This method requires RDKit to be installed.
@@ -407,7 +431,7 @@ def bond_features(bond, use_chirality=False):


def max_pair_distance_pairs(mol: RDKitMol,
                            max_pair_distance: Optional[int]) -> np.ndarray:
                            max_pair_distance: Optional[int] = None) -> np.ndarray:
  """Helper method which finds atom pairs within max_pair_distance graph distance.

  This helper method is used to find atoms which are within max_pair_distance
@@ -434,6 +458,13 @@ def max_pair_distance_pairs(mol: RDKitMol,
    distance 2 apart. If `max_pair_distance` is `None`, all pairs are
    considered (effectively infinite `max_pair_distance`)

  Examples
  --------
  >>> from rdkit import Chem
  >>> mol = Chem.MolFromSmiles('CCC')
  >>> dc.feat.graph_features.max_pair_distance_pairs(mol, 1)
  array([[0, 0, 1, 1, 1, 2, 2],
       [0, 1, 0, 1, 2, 1, 2]])

  Returns
  -------
+10 −0
Original line number Diff line number Diff line
@@ -19,6 +19,16 @@ class MACCSKeysFingerprint(MolecularFeaturizer):
  Note
  ----
  This class requires RDKit to be installed.

  Examples
  --------
  >>> import deepchem as dc
  >>> smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
  >>> featurizer = dc.feat.MACCSKeysFingerprint()
  >>> features = featurizer.featurize([smiles])
  >>> features.shape
  (1, 167)

  """

  def __init__(self):
+11 −0
Original line number Diff line number Diff line
@@ -19,6 +19,17 @@ class PubChemFingerprint(MolecularFeaturizer):
  -----
  This class requires RDKit and PubChemPy to be installed.
  PubChemPy use REST API to get the fingerprint, so you need the internet access.

  Examples
  --------
  >>> import deepchem as dc
  >>> smiles = 'CCC'
  >>> featurizer = dc.feat.PubChemFingerprint()
  >>> features = featurizer.featurize([smiles])
  >>> features.shape
  (1, 881)
  >>> type(features)
  numpy.ndarray
  """

  def __init__(self):