Unverified Commit 7dc98630 authored by Daiki Nishikawa's avatar Daiki Nishikawa Committed by GitHub
Browse files

Merge pull request #2243 from nd-02110114/maccs-key

Implement MACCS key and PubChem fingerprint featurizer
parents 7e745b93 6ef4c4bd
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -21,10 +21,12 @@ from deepchem.feat.molecule_featurizers import BPSymmetryFunctionInput
from deepchem.feat.molecule_featurizers import CircularFingerprint
from deepchem.feat.molecule_featurizers import CoulombMatrix
from deepchem.feat.molecule_featurizers import CoulombMatrixEig
from deepchem.feat.molecule_featurizers import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers import MordredDescriptors
from deepchem.feat.molecule_featurizers import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers import MolGraphConvFeaturizer
from deepchem.feat.molecule_featurizers import OneHotFeaturizer
from deepchem.feat.molecule_featurizers import PubChemFingerprint
from deepchem.feat.molecule_featurizers import RawFeaturizer
from deepchem.feat.molecule_featurizers import RDKitDescriptors
from deepchem.feat.molecule_featurizers import SmilesToImage
+2 −0
Original line number Diff line number Diff line
@@ -4,9 +4,11 @@ from deepchem.feat.molecule_featurizers.bp_symmetry_function_input import BPSymm
from deepchem.feat.molecule_featurizers.circular_fingerprint import CircularFingerprint
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrix
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.molecule_featurizers.maccs_keys_fingerprint import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers.mordred_descriptors import MordredDescriptors
from deepchem.feat.molecule_featurizers.mol2vec_fingerprint import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers.one_hot_featurizer import OneHotFeaturizer
from deepchem.feat.molecule_featurizers.pubchem_fingerprint import PubChemFingerprint
from deepchem.feat.molecule_featurizers.raw_featurizer import RawFeaturizer
from deepchem.feat.molecule_featurizers.rdkit_descriptors import RDKitDescriptors
from deepchem.feat.molecule_featurizers.smiles_to_image import SmilesToImage
+47 −0
Original line number Diff line number Diff line
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class MACCSKeysFingerprint(MolecularFeaturizer):
  """MACCS Keys Fingerprint.

  The MACCS (Molecular ACCess System) keys are one of the most commonly used structural keys.
  Please confirm the details in [1]_, [2]_.

  References
  ----------
  .. [1] Durant, Joseph L., et al. "Reoptimization of MDL keys for use in drug discovery."
     Journal of chemical information and computer sciences 42.6 (2002): 1273-1280.
  .. [2] https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py

  Notes
  -----
  This class requires RDKit to be installed.
  """

  def __init__(self):
    """Initialize this featurizer."""
    try:
      from rdkit.Chem.AllChem import GetMACCSKeysFingerprint  # noqa
    except ModuleNotFoundError:
      raise ValueError("This class requires RDKit to be installed.")

    self.calculator = GetMACCSKeysFingerprint

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """
    Calculate MACCS keys fingerprint.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      1D array of RDKit descriptors for `mol`. The length is 167.
    """
    return self.calculator(mol)
+52 −0
Original line number Diff line number Diff line
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class PubChemFingerprint(MolecularFeaturizer):
  """PubChem Fingerprint.

  The PubChem fingerprint is a 881 bit structural key,
  which is used by PubChem for similarity searching.
  Please confirm the details in [1]_.

  References
  ----------
  .. [1] ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.pdf

  Notes
  -----
  This class requires RDKit and PubChemPy to be installed.
  PubChemPy use REST API to get the fingerprint, so you need the internet access.
  """

  def __init__(self):
    """Initialize this featurizer."""
    try:
      from rdkit import Chem  # noqa
      import pubchempy as pcp  # noqa
    except ModuleNotFoundError:
      raise ValueError("This class requires PubChemPy to be installed.")

    self.get_pubchem_compounds = pcp.get_compounds

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """
    Calculate PubChem fingerprint.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      1D array of RDKit descriptors for `mol`. The length is 881.
    """
    from rdkit import Chem
    smiles = Chem.MolToSmiles(mol)
    pubchem_compound = self.get_pubchem_compounds(smiles, 'smiles')[0]
    feature = [int(bit) for bit in pubchem_compound.cactvs_fingerprint]
    return np.asarray(feature)
+25 −0
Original line number Diff line number Diff line
import unittest

from deepchem.feat import MACCSKeysFingerprint


class TestMACCSKeysFingerprint(unittest.TestCase):
  """
  Test MACCSKeyFingerprint.
  """

  def setUp(self):
    """
    Set up tests.
    """
    from rdkit import Chem
    smiles = 'CC(=O)OC1=CC=CC=C1C(=O)O'
    self.mol = Chem.MolFromSmiles(smiles)

  def test_maccs_key_fingerprint(self):
    """
    Test simple fingerprint.
    """
    featurizer = MACCSKeysFingerprint()
    feature_sum = featurizer([self.mol])
    assert feature_sum.shape == (1, 167)
Loading