Commit 823e1bfe authored by nd-02110114's avatar nd-02110114
Browse files

Merge branch 'master' into fix-docs-build

parents 720d38b7 72659d83
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -21,10 +21,12 @@ from deepchem.feat.molecule_featurizers import BPSymmetryFunctionInput
from deepchem.feat.molecule_featurizers import CircularFingerprint
from deepchem.feat.molecule_featurizers import CoulombMatrix
from deepchem.feat.molecule_featurizers import CoulombMatrixEig
from deepchem.feat.molecule_featurizers import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers import MordredDescriptors
from deepchem.feat.molecule_featurizers import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers import MolGraphConvFeaturizer
from deepchem.feat.molecule_featurizers import OneHotFeaturizer
from deepchem.feat.molecule_featurizers import PubChemFingerprint
from deepchem.feat.molecule_featurizers import RawFeaturizer
from deepchem.feat.molecule_featurizers import RDKitDescriptors
from deepchem.feat.molecule_featurizers import SmilesToImage
+2 −0
Original line number Diff line number Diff line
@@ -4,9 +4,11 @@ from deepchem.feat.molecule_featurizers.bp_symmetry_function_input import BPSymm
from deepchem.feat.molecule_featurizers.circular_fingerprint import CircularFingerprint
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrix
from deepchem.feat.molecule_featurizers.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.molecule_featurizers.maccs_keys_fingerprint import MACCSKeysFingerprint
from deepchem.feat.molecule_featurizers.mordred_descriptors import MordredDescriptors
from deepchem.feat.molecule_featurizers.mol2vec_fingerprint import Mol2VecFingerprint
from deepchem.feat.molecule_featurizers.one_hot_featurizer import OneHotFeaturizer
from deepchem.feat.molecule_featurizers.pubchem_fingerprint import PubChemFingerprint
from deepchem.feat.molecule_featurizers.raw_featurizer import RawFeaturizer
from deepchem.feat.molecule_featurizers.rdkit_descriptors import RDKitDescriptors
from deepchem.feat.molecule_featurizers.smiles_to_image import SmilesToImage
+47 −0
Original line number Diff line number Diff line
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class MACCSKeysFingerprint(MolecularFeaturizer):
  """MACCS Keys Fingerprint.

  The MACCS (Molecular ACCess System) keys are one of the most commonly used structural keys.
  Please confirm the details in [1]_, [2]_.

  References
  ----------
  .. [1] Durant, Joseph L., et al. "Reoptimization of MDL keys for use in drug discovery."
     Journal of chemical information and computer sciences 42.6 (2002): 1273-1280.
  .. [2] https://github.com/rdkit/rdkit/blob/master/rdkit/Chem/MACCSkeys.py

  Notes
  -----
  This class requires RDKit to be installed.
  """

  def __init__(self):
    """Initialize this featurizer."""
    try:
      from rdkit.Chem.AllChem import GetMACCSKeysFingerprint  # noqa
    except ModuleNotFoundError:
      raise ValueError("This class requires RDKit to be installed.")

    self.calculator = GetMACCSKeysFingerprint

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """
    Calculate MACCS keys fingerprint.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      1D array of RDKit descriptors for `mol`. The length is 167.
    """
    return self.calculator(mol)
+2 −15
Original line number Diff line number Diff line
@@ -42,8 +42,7 @@ class Mol2VecFingerprint(MolecularFeaturizer):
  def __init__(self,
               pretrain_model_path: Optional[str] = None,
               radius: int = 1,
               unseen: str = 'UNK',
               gather_method: str = 'sum'):
               unseen: str = 'UNK'):
    """
    Parameters
    ----------
@@ -56,9 +55,6 @@ class Mol2VecFingerprint(MolecularFeaturizer):
      github repository.
    unseen: str, optional (default 'UNK')
      The string to used to replace uncommon words/identifiers while training.
    gather_method: str, optional (default 'sum')
      How to aggregate vectors of identifiers are extracted from Mol2vec.
      'sum' or 'mean' is supported.
    """
    try:
      from gensim.models import word2vec
@@ -68,7 +64,6 @@ class Mol2VecFingerprint(MolecularFeaturizer):

    self.radius = radius
    self.unseen = unseen
    self.gather_method = gather_method
    self.sentences2vec = sentences2vec
    self.mol2alt_sentence = mol2alt_sentence
    if pretrain_model_path is None:
@@ -98,13 +93,5 @@ class Mol2VecFingerprint(MolecularFeaturizer):
      1D array of mol2vec fingerprint. The default length is 300.
    """
    sentence = self.mol2alt_sentence(mol, self.radius)
    vec_identifiers = self.sentences2vec(
        sentence, self.model, unseen=self.unseen)
    if self.gather_method == 'sum':
      feature = np.sum(vec_identifiers, axis=0)
    elif self.gather_method == 'mean':
      feature = np.mean(vec_identifiers, axis=0)
    else:
      raise ValueError(
          'Not supported gather_method type. Please set "sum" or "mean"')
    feature = self.sentences2vec([sentence], self.model, unseen=self.unseen)[0]
    return feature
+52 −0
Original line number Diff line number Diff line
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class PubChemFingerprint(MolecularFeaturizer):
  """PubChem Fingerprint.

  The PubChem fingerprint is a 881 bit structural key,
  which is used by PubChem for similarity searching.
  Please confirm the details in [1]_.

  References
  ----------
  .. [1] ftp://ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.pdf

  Notes
  -----
  This class requires RDKit and PubChemPy to be installed.
  PubChemPy use REST API to get the fingerprint, so you need the internet access.
  """

  def __init__(self):
    """Initialize this featurizer."""
    try:
      from rdkit import Chem  # noqa
      import pubchempy as pcp  # noqa
    except ModuleNotFoundError:
      raise ValueError("This class requires PubChemPy to be installed.")

    self.get_pubchem_compounds = pcp.get_compounds

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """
    Calculate PubChem fingerprint.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      1D array of RDKit descriptors for `mol`. The length is 881.
    """
    from rdkit import Chem
    smiles = Chem.MolToSmiles(mol)
    pubchem_compound = self.get_pubchem_compounds(smiles, 'smiles')[0]
    feature = [int(bit) for bit in pubchem_compound.cactvs_fingerprint]
    return np.asarray(feature)
Loading