Unverified Commit 7c392f6f authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2288 from aksub99/Add_composition_featurizer

Add `ElemNetFeaturizer`
parents 7177c685 b9edb908
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ from deepchem.feat.complex_featurizers import ComplexNeighborListFragmentAtomicC
from deepchem.feat.material_featurizers import ElementPropertyFingerprint
from deepchem.feat.material_featurizers import SineCoulombMatrix
from deepchem.feat.material_featurizers import CGCNNFeaturizer
from deepchem.feat.material_featurizers import ElemNetFeaturizer

try:
  import transformers
+1 −0
Original line number Diff line number Diff line
@@ -5,3 +5,4 @@ Featurizers for inorganic crystals.
from deepchem.feat.material_featurizers.element_property_fingerprint import ElementPropertyFingerprint
from deepchem.feat.material_featurizers.sine_coulomb_matrix import SineCoulombMatrix
from deepchem.feat.material_featurizers.cgcnn_featurizer import CGCNNFeaturizer
from deepchem.feat.material_featurizers.elemnet_featurizer import ElemNetFeaturizer
+83 −0
Original line number Diff line number Diff line
import numpy as np
from typing import DefaultDict, Union

from deepchem.utils.typing import PymatgenComposition
from deepchem.feat import MaterialCompositionFeaturizer

elements_tl = [
    'H', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Na', 'Mg', 'Al', 'Si', 'P', 'S',
    'Cl', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn',
    'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc',
    'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba',
    'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er',
    'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl',
    'Pb', 'Bi', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu'
]


class ElemNetFeaturizer(MaterialCompositionFeaturizer):
  """
  Fixed size vector of length 86 containing raw fractional elemental
  compositions in the compound. The 86 chosen elements are based on the
  original implementation at https://github.com/NU-CUCIS/ElemNet.

  Returns a vector containing fractional compositions of each element
  in the compound.

  References
  ----------
  .. [1] Jha, D., Ward, L., Paul, A. et al. Sci Rep 8, 17593 (2018).
     https://doi.org/10.1038/s41598-018-35934-y

  Examples
  --------
  >>> import pymatgen as mg
  >>> comp = "Fe2O3"
  >>> featurizer = ElemNetFeaturizer()
  >>> features = featurizer.featurize([comp])

  Notes
  -----
  This class requires Pymatgen to be installed.
  """

  def get_vector(self, comp: DefaultDict) -> Union[np.ndarray, None]:
    """
    Converts a dictionary containing element names and corresponding
    compositional fractions into a vector of fractions.

    Parameters
    ----------
    comp: collections.defaultdict object
      Dictionary mapping element names to fractional compositions.

    Returns
    -------
    fractions: np.ndarray
      Vector of fractional compositions of each element.
    """
    if all(e in elements_tl for e in comp):
      fractions = np.array([comp[e] if e in comp else 0 for e in elements_tl],
                           np.float32)
    else:
      fractions = None
    return fractions

  def _featurize(self, composition: PymatgenComposition) -> np.ndarray:
    """
    Calculate 86 dimensional vector containing fractional compositions of
    each element in the compound.

    Parameters
    ----------
    composition: pymatgen.Composition object
      Composition object.

    Returns
    -------
    feats: np.ndarray
      86 dimensional vector containing fractional compositions of elements.
    """
    fractions = composition.fractional_composition.get_el_amt_dict()
    feat = self.get_vector(fractions)
    return feat
+14 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ Test featurizers for inorganic crystals.
import unittest
import numpy as np

from deepchem.feat import ElementPropertyFingerprint, SineCoulombMatrix, CGCNNFeaturizer
from deepchem.feat import ElementPropertyFingerprint, SineCoulombMatrix, CGCNNFeaturizer, ElemNetFeaturizer


class TestMaterialFeaturizers(unittest.TestCase):
@@ -83,3 +83,16 @@ class TestMaterialFeaturizers(unittest.TestCase):
    assert graph_features[0].node_features.shape == (1, 92)
    assert graph_features[0].edge_index.shape == (2, 6)
    assert graph_features[0].edge_features.shape == (6, 11)

  def test_elemnet_featurizer(self):
    """
    Test ElemNetFeaturizer.
    """

    featurizer = ElemNetFeaturizer()
    features = featurizer.featurize([self.formula])

    assert features.shape[1] == 86
    assert np.isclose(features[0][13], 0.6666667, atol=0.01)
    assert np.isclose(features[0][38], 0.33333334, atol=0.01)
    assert np.isclose(features.sum(), 1.0, atol=0.01)
+6 −0
Original line number Diff line number Diff line
@@ -229,6 +229,12 @@ ElementPropertyFingerprint
.. autoclass:: deepchem.feat.ElementPropertyFingerprint
  :members:

ElemNetFeaturizer
^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.ElemNetFeaturizer
  :members:

BindingPocketFeaturizer
-----------------------