Unverified Commit 0b8b1347 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2020 from ncfrey/material_featurizer_renames

Descriptive material featurizer names
parents 53366e7d 64c53237
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -3,8 +3,8 @@ Making it easy to import in classes.
"""
from deepchem.feat.base_classes import Featurizer
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.feat.base_classes import StructureFeaturizer
from deepchem.feat.base_classes import CompositionFeaturizer
from deepchem.feat.base_classes import MaterialStructureFeaturizer
from deepchem.feat.base_classes import MaterialCompositionFeaturizer
from deepchem.feat.base_classes import ComplexFeaturizer
from deepchem.feat.base_classes import UserDefinedFeaturizer
from deepchem.feat.graph_features import ConvMolFeaturizer
+43 −18
Original line number Diff line number Diff line
@@ -9,8 +9,6 @@ from typing import Iterable, Union, Dict, Any

logger = logging.getLogger(__name__)

JSON = Dict[str, Any]


class Featurizer(object):
  """Abstract class for calculating a set of features for a datapoint.
@@ -23,15 +21,18 @@ class Featurizer(object):
  new datatype.
  """

  def featurize(self, datapoints, log_every_n=1000):
  def featurize(self, datapoints: Iterable[Any],
                log_every_n: int = 1000) -> np.ndarray:
    """Calculate features for datapoints.

    Parameters
    ----------
    datapoints: iterable 
    datapoints: Iterable[Any]
       A sequence of objects that you'd like to featurize. Subclassses of
       `Featurizer` should instantiate the `_featurize` method that featurizes
       objects in the sequence.
    log_every_n: int, default 1000
      Logs featurization progress every `log_every_n` steps.

    Returns
    -------
@@ -68,7 +69,8 @@ class Featurizer(object):
    Parameters
    ----------
    datapoint: object 
      a single datapoint in a sequence of objects
      Any blob of data you like. Subclass should instantiate
      this. 
    """
    raise NotImplementedError('Featurizer is not defined.')

@@ -220,12 +222,12 @@ class MolecularFeaturizer(Featurizer):
    return features


class StructureFeaturizer(Featurizer):
class MaterialStructureFeaturizer(Featurizer):
  """
  Abstract class for calculating a set of features for an
  inorganic crystal structure.

  The defining feature of a `StructureFeaturizer` is that it
  The defining feature of a `MaterialStructureFeaturizer` is that it
  operates on 3D crystal structures with periodic boundary conditions. 
  Inorganic crystal structures are represented by Pymatgen structure
  objects. Featurizers for inorganic crystal structures that are subclasses of
@@ -244,15 +246,16 @@ class StructureFeaturizer(Featurizer):

  """

  def featurize(self, structures: Iterable[JSON],
  def featurize(self,
                structures: Iterable[Dict[str, Any]],
                log_every_n: int = 1000) -> np.ndarray:
    """Calculate features for crystal structures.

    Parameters
    ----------
    structures: Iterable[JSON]
    structures: Iterable[Dict[str, Any]]
      Iterable sequence of pymatgen structure dictionaries.
      Json-serializable dictionary representation of pymatgen.core.structure
      Dictionary representations of pymatgen.Structure
      https://pymatgen.org/pymatgen.core.structure.html
    log_every_n: int, default 1000
      Logging messages reported every `log_every_n` samples.
@@ -265,7 +268,6 @@ class StructureFeaturizer(Featurizer):

    """

    # Convert iterables to list
    structures = list(structures)

    try:
@@ -288,13 +290,25 @@ class StructureFeaturizer(Featurizer):
    features = np.asarray(features)
    return features

  def __call__(self, structures: Iterable[Dict[str, Any]]):
    """Calculate features for crystal structures.

    Parameters
    ----------
    structures: Iterable[Dict[str, Any]]
      An iterable of pymatgen.Structure dictionaries.

    """

    return self.featurize(structures)

class CompositionFeaturizer(Featurizer):

class MaterialCompositionFeaturizer(Featurizer):
  """
  Abstract class for calculating a set of features for an
  inorganic crystal composition.

  The defining feature of a `CompositionFeaturizer` is that it
  The defining feature of a `MaterialCompositionFeaturizer` is that it
  operates on 3D crystal chemical compositions. 
  Inorganic crystal compositions are represented by Pymatgen composition
  objects. Featurizers for inorganic crystal compositions that are 
@@ -332,7 +346,6 @@ class CompositionFeaturizer(Featurizer):

    """

    # Convert iterables to list
    compositions = list(compositions)

    try:
@@ -355,6 +368,18 @@ class CompositionFeaturizer(Featurizer):
    features = np.asarray(features)
    return features

  def __call__(self, compositions: Iterable[str]):
    """Calculate features for crystal compositions.

    Parameters
    ----------
    compositions: Iterable[str]
      An iterable of crystal compositions.

    """

    return self.featurize(compositions)


class UserDefinedFeaturizer(Featurizer):
  """Directs usage of user-computed featurizations."""
+6 −6
Original line number Diff line number Diff line
@@ -4,11 +4,11 @@ Featurizers for inorganic crystals.

import numpy as np

from deepchem.feat import StructureFeaturizer, CompositionFeaturizer
from deepchem.feat import MaterialStructureFeaturizer, MaterialCompositionFeaturizer
from deepchem.utils import pad_array


class ElementPropertyFingerprint(CompositionFeaturizer):
class ElementPropertyFingerprint(MaterialCompositionFeaturizer):
  """
  Fingerprint of elemental properties from composition.

@@ -50,7 +50,7 @@ class ElementPropertyFingerprint(CompositionFeaturizer):

    self.data_source = data_source

  def _featurize(self, composition: "pymatgen.Composition"):
  def _featurize(self, composition):
    """
    Calculate chemical fingerprint from crystal composition.

@@ -81,7 +81,7 @@ class ElementPropertyFingerprint(CompositionFeaturizer):
    return np.array(feats)


class SineCoulombMatrix(StructureFeaturizer):
class SineCoulombMatrix(MaterialStructureFeaturizer):
  """
  Calculate sine Coulomb matrix for crystals.

@@ -124,7 +124,7 @@ class SineCoulombMatrix(StructureFeaturizer):
    self.max_atoms = int(max_atoms)
    self.flatten = flatten

  def _featurize(self, struct: "pymatgen.Structure"):
  def _featurize(self, struct):
    """
    Calculate sine Coulomb matrix from pymatgen structure.

@@ -164,7 +164,7 @@ class SineCoulombMatrix(StructureFeaturizer):
    return features


class StructureGraphFeaturizer(StructureFeaturizer):
class StructureGraphFeaturizer(MaterialStructureFeaturizer):
  """
  Calculate structure graph features for crystals.

+8 −8
Original line number Diff line number Diff line
@@ -161,17 +161,17 @@ AtomConvFeaturizer
.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
  :members:

StructureFeaturizer
-------------------
MaterialStructureFeaturizer
---------------------------

Structure Featurizers are those that work with datasets of crystals with
Material Structure Featurizers are those that work with datasets of crystals with
periodic boundary conditions. For inorganic crystal structures, these
featurizers operate on pymatgen.Structure objects, which include a
lattice and 3D coordinates that specify a periodic crystal structure. 
They should be applied on systems that have periodic boundary conditions.
Structure featurizers are not designed to work with molecules. 

.. autoclass:: deepchem.feat.StructureFeaturizer
.. autoclass:: deepchem.feat.MaterialStructureFeaturizer
  :members:

SineCoulombMatrix
@@ -186,17 +186,17 @@ StructureGraphFeaturizer
.. autoclass:: deepchem.feat.StructureGraphFeaturizer
  :members:

CompositionFeaturizer
---------------------
MaterialCompositionFeaturizer
-----------------------------

Composition Featurizers are those that work with datasets of crystal
Material Composition Featurizers are those that work with datasets of crystal
compositions with periodic boundary conditions. 
For inorganic crystal structures, these featurizers operate on chemical
compositions (e.g. "MoS2"). They should be applied on systems that have
periodic boundary conditions. Composition featurizers are not designed 
to work with molecules. 

.. autoclass:: deepchem.feat.CompositionFeaturizer
.. autoclass:: deepchem.feat.MaterialCompositionFeaturizer
  :members:

ElementPropertyFingerprint