Commit 99fe8027 authored by Nathan Frey's avatar Nathan Frey
Browse files

Expand docstrings and tests

parent 0b62f07d
Loading
Loading
Loading
Loading
+102 −30
Original line number Diff line number Diff line
@@ -7,24 +7,44 @@ import numpy as np
from deepchem.feat import Featurizer
from deepchem.utils import pad_array

from matminer.featurizers.composition import ElementProperty

from pymatgen import Composition, Structure
from pymatgen.analysis.graphs import StructureGraph
from pymatgen.analysis.local_env import MinimumDistanceNN


class ChemicalFingerprint(Featurizer):
  """
  Chemical fingerprint of elemental properties from composition.

  Based on the data source chosen, returns properties and statistics
  (min, max, range, mean, standard deviation, mode) for a compound
  based on elemental stoichiometry. E.g., the average electronegativity
  of atoms in a crystal structure. The chemical fingerprint is a 
  vector of these statistics. For a full list of properties and statistics,
  see ElementProperty(data_source).feature_labels().

  This featurizer requires the optional dependencies pymatgen and
  matminer. It may be useful when only crystal compositions are available
  (and not 3D coordinates).

  References are given for each data source:
    MagPie data: Ward, L. et al. npj Comput Mater 2, 16028 (2016).
    https://doi.org/10.1038/npjcompumats.2016.28

    Deml data: Deml, A. et al. Physical Review B 93, 085142 (2016).
    10.1103/PhysRevB.93.085142

    Matminer: Ward, L. et al. Comput. Mater. Sci. 152, 60-69 (2018).

    Pymatgen: Ong, S.P. et al. Comput. Mater. Sci. 68, 314-319 (2013). 

  """

  def __init__(self, data_source='matminer'):
    """
    Parameters
    ----------
  data_source : str, optional (default "matminer")
      Source for element property data ("matminer", "magpie", "deml")
    data_source : {"matminer", "magpie", "deml"}
      Source for element property data.

    """

  def __init__(self, data_source='matminer'):
    self.data_source = data_source

  def _featurize(self, comp):
@@ -33,9 +53,14 @@ class ChemicalFingerprint(Featurizer):

    Parameters
    ----------
    comp : Reduced formula of crystal.
    comp : str
      Reduced formula of crystal.

    """

    from pymatgen import Composition
    from matminer.featurizers.composition import ElementProperty

    # Get pymatgen Composition object
    c = Composition(comp)

@@ -53,21 +78,36 @@ class SineCoulombMatrix(Featurizer):
  """
  Calculate sine Coulomb matrix for crystals.

  Variant of Coulomb matrix for periodic crystals
  Faber et al. (Inter. J. Quantum Chem.
  115, 16, 2015).
  A variant of Coulomb matrix for periodic crystals, based on 
  Faber et al. Inter. J. Quantum Chem. 115, 16, (2015).

  The sine Coulomb matrix is identical to the Coulomb matrix, except
  that the inverse distance function is replaced by the inverse of
  sin**2 of the vector between sites which are periodic in the 
  dimensions of the crystal lattice.

  Features are flattened into a vector of matrix eigenvalues by default
  for ML-readiness. To ensure that all feature vectors are equal
  length, the maximum number of atoms (eigenvalues) in the input
  dataset must be specified.

  This featurizer requires the optional dependency pymatgen. It may be
  useful when crystal structures with 3D coordinates are available.

  """

  def __init__(self, max_atoms, eig=True):
    """
    Parameters
    ----------
    max_atoms : int
      Maximum number of atoms for any crystal in the dataset. Used to
      pad the Coulomb matrix.
    eig : bool (default True)
      Return flattened vector of matrix eigenvalues
      Return flattened vector of matrix eigenvalues.

    """

  def __init__(self, max_atoms, eig=True):
    self.max_atoms = int(max_atoms)
    self.eig = eig

@@ -77,9 +117,13 @@ class SineCoulombMatrix(Featurizer):

    Parameters
    ----------
    struct : pymatgen structure dictionary
    struct : dict
      pymatgen structure dictionary

    """

    from pymatgen import Structure

    s = Structure.from_dict(struct)
    features = self.sine_coulomb_matrix(s)
    features = np.asarray(features)
@@ -93,6 +137,7 @@ class SineCoulombMatrix(Featurizer):
    Parameters
    ----------
    s : pymatgen structure

    """

    sites = s.sites
@@ -132,15 +177,36 @@ class StructureGraphFeaturizer(Featurizer):
  """
  Calculate structure graph for crystals.

  Create a graph representation of a crystal structure where atoms
  are nodes and connections between atoms (bonds) are edges. Bonds
  are determined by choosing a strategy for finding nearest neighbors
  from pymatgen.analysis.local_env. For periodic
  graphs, each edge belongs to a lattice image.

  The NetworkX package is used for graph representations.
  Hagberg, A. et al. SciPy2008, 11-15 (2008).

  This featurizer requires the optional dependency pymatgen. It may
  be useful when using graph network models and crystal graph
  convolutional networks.

  #TODO (@ncfrey) process graph features for models

  """

  def __init__(self, strategy=None):
    """
    Parameters
    ----------
    strategy : pymatgen.analysis.local_env.NearNeighbors
      An instance of NearNeighbors that determines how graph is constructed.

  #TODO (@ncfrey) process graph features for models
    """

  def __init__(self, strategy=MinimumDistanceNN()):
    if not strategy:
      from pymatgen.analysis.local_env import MinimumDistanceNN
      strategy = MinimumDistanceNN()

    self.strategy = strategy

  def _featurize(self, struct):
@@ -149,9 +215,13 @@ class StructureGraphFeaturizer(Featurizer):

    Parameters
    ----------
    struct : pymatgen structure dictionary.
    struct : dict
      pymatgen structure dictionary.

    """

    from pymatgen import Structure

    # Get pymatgen structure object
    s = Structure.from_dict(struct)

@@ -166,10 +236,12 @@ class StructureGraphFeaturizer(Featurizer):
    Parameters
    ----------
    struct : pymatgen structure
    
    """

    atom_features = np.array([site.specie.Z for site in struct],
                        dtype='int32')
    from pymatgen.analysis.graphs import StructureGraph

    atom_features = np.array([site.specie.Z for site in struct], dtype='int32')

    sg = StructureGraph.with_local_env_strategy(struct, self.strategy)
    nodes = np.asarray(sg.graph.nodes)
+5 −1
Original line number Diff line number Diff line
@@ -52,8 +52,11 @@ class TestMaterialFeaturizers(unittest.TestCase):
    """

    featurizer = ChemicalFingerprint(data_source='matminer')
    features = featurizer.featurize([self.formula])

    assert isinstance(featurizer, ChemicalFingerprint)
    assert len(features[0]) == 65
    assert np.allclose(
        features[0][:5], [2.16, 2.58, 0.42, 2.44, 0.29698485], atol=0.1)

  def testSCM(self):
    """
@@ -63,4 +66,5 @@ class TestMaterialFeaturizers(unittest.TestCase):
    featurizer = SineCoulombMatrix(1)
    features = featurizer.featurize([self.struct_dict])

    assert len(features) == 1
    assert np.isclose(features[0], 1244, atol=.5)
+17 −0
Original line number Diff line number Diff line
@@ -116,6 +116,23 @@ AtomConvFeaturizer
.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
  :members:

ChemicalFingerprint
^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.ChemicalFingerprint
  :members:

SineCoulombMatrix
^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.SineCoulombMatrix
  :members:

StructureGraphFeaturizer
^^^^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.StructureGraphFeaturizer
  :members:

BindingPocketFeaturizer
-----------------------