Commit 173ccbdb authored by Nathan Frey's avatar Nathan Frey
Browse files

Init commit on crystal featurizer

parent 344b1ff7
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ Making it easy to import in classes.
"""
from deepchem.feat.base_classes import Featurizer
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.feat.base_classes import CrystalFeaturizer
from deepchem.feat.base_classes import ComplexFeaturizer
from deepchem.feat.base_classes import UserDefinedFeaturizer
from deepchem.feat.graph_features import ConvMolFeaturizer
+89 −1
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ import logging
import types
import numpy as np
import multiprocessing
from typing import Iterable, Union

logger = logging.getLogger(__name__)

@@ -165,7 +166,7 @@ class MolecularFeaturizer(Featurizer):
      molecules = [molecules]
    else:
      # Convert iterables to list
      molecutes = list(molecules)
      molecules = list(molecules)
    features = []
    for i, mol in enumerate(molecules):
      if i % log_every_n == 0:
@@ -207,6 +208,93 @@ class MolecularFeaturizer(Featurizer):
    return self.featurize(molecules)


class CrystalFeaturizer(Featurizer):
  """
  Abstract class for calculating a set of features for a
  crystal structure.

  The defining feature of a `CrystalFeaturizer` is that it
  operates on 3D crystals with periodic boundary conditions. Inorganic
  crystal structures are represented by Pymatgen composition and structure
  objects. Featurizers for inorganic crystal structures that are subclasses of
  this class should plan to process input which comes as composition
  strings or pymatgen structure dictionaries. 

  Child classes need to implement the _featurize method for
  calculating features for a single crystal.

  Notes
  -----
  Some subclasses of this class will require pymatgen and matminer to be
  installed.

  """

  def featurize(self, crystals: Iterable, log_every_n: int = 1000) -> np.ndarray:
    """Calculate features for crystals.

    Parameters
    ----------
    crystals: Iterable
      Iterable sequence of composition strings, pymatgen structure
      dictionaries, or another crystal representation.
    log_every_n: int, default 1000
      Logging messages reported every `log_every_n` samples.

    Returns
    -------
    features: np.ndarray
      A numpy array containing a featurized representation of
      `crystals`.

    """

    # Special case handling of single crystal
    if not isinstance(crystals, Iterable):
      crystals = [crystals]
    else:
      # Convert iterables to list
      crystals = list(crystals)

    features = []
    for idx, crystal in enumerate(crystals):
      if idx % log_every_n == 0:
        logger.info("Featurizing datapoint %i" % idx)
      try:
        features.append(self._featurize(crystal))
      except:
        logger.warning(
            "Failed to featurize datapoint %i. Appending empty array" % idx)
        features.append(np.array([]))

    features = np.asarray(features)
    return features

  def _featurize(self, crystal):
    """Calculate features for a single crystal.

    Parameters
    ----------
    crystal: crystal representation
        Crystal.

    """

    raise NotImplementedError('Featurizer is not defined.')

  def __call__(self, crystals: Iterable):
    """Calculate features for crystals.

    Parameters
    ----------
    crystals: Iterable
        An iterable of crystal representations.

    """

    return self.featurize(crystals)


class UserDefinedFeaturizer(Featurizer):
  """Directs usage of user-computed featurizations."""

+18 −9
Original line number Diff line number Diff line
@@ -4,11 +4,11 @@ Featurizers for inorganic crystals.

import numpy as np

from deepchem.feat import Featurizer
from deepchem.feat import CrystalFeaturizer
from deepchem.utils import pad_array


class ElementPropertyFingerprint(Featurizer):
class ElementPropertyFingerprint(CrystalFeaturizer):
  """
  Fingerprint of elemental properties from composition.

@@ -67,8 +67,11 @@ class ElementPropertyFingerprint(Featurizer):

    """

    try:
      from pymatgen import Composition
      from matminer.featurizers.composition import ElementProperty
    except ModuleNotFoundError:
      raise ValueError("This class requires pymatgen and matminer to be installed.")

    # Get pymatgen Composition object
    c = Composition(comp)
@@ -83,7 +86,7 @@ class ElementPropertyFingerprint(Featurizer):
    return np.array(feats)


class SineCoulombMatrix(Featurizer):
class SineCoulombMatrix(CrystalFeaturizer):
  """
  Calculate sine Coulomb matrix for crystals.

@@ -144,8 +147,11 @@ class SineCoulombMatrix(Featurizer):

    """

    try:
      from pymatgen import Structure
      from matminer.featurizers.structure import SineCoulombMatrix as SCM
    except ModuleNotFoundError:
      raise ValueError("This class requires pymatgen and matminer to be installed.")

    s = Structure.from_dict(struct)

@@ -166,7 +172,7 @@ class SineCoulombMatrix(Featurizer):
    return features


class StructureGraphFeaturizer(Featurizer):
class StructureGraphFeaturizer(CrystalFeaturizer):
  """
  Calculate structure graph features for crystals.

@@ -224,7 +230,10 @@ class StructureGraphFeaturizer(Featurizer):

    """

    try:
      from pymatgen import Structure
    except ModuleNotFoundError:
      raise ValueError("This class requires pymatgen to be installed.")

    # Get pymatgen structure object
    s = Structure.from_dict(struct)
+12 −8
Original line number Diff line number Diff line
@@ -161,14 +161,18 @@ AtomConvFeaturizer
.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
  :members:

MaterialsFeaturizers
--------------------

Materials Featurizers are those that work with datasets of inorganic crystals.
These featurizers operate on chemical compositions (e.g. "MoS2"), or on a
lattice and 3D coordinates that specify a periodic crystal structure. They
should be applied on systems that have periodic boundary conditions. Materials
featurizers are not designed to work with molecules. 
CrystalFeaturizer
-----------------

Crystal Featurizers are those that work with datasets of crystals with
periodic boundary conditions. For inorganic crystal structures, these
featurizers operate on chemical compositions (e.g. "MoS2"), or on a 
lattice and 3D coordinates that specify a periodic crystal structure. 
They should be applied on systems that have periodic boundary conditions.
Crystal featurizers are not designed to work with molecules. 

.. autoclass:: deepchem.feat.CrystalFeaturizer
  :members:

ElementPropertyFingerprint
^^^^^^^^^^^^^^^^^^^^^^^^^^