Commit 5f271014 authored by Nathan Frey's avatar Nathan Frey
Browse files

Descriptive material featurizer names

parent 5e025e46
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -3,8 +3,8 @@ Making it easy to import in classes.
"""
from deepchem.feat.base_classes import Featurizer
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.feat.base_classes import StructureFeaturizer
from deepchem.feat.base_classes import CompositionFeaturizer
from deepchem.feat.base_classes import MaterialStructureFeaturizer
from deepchem.feat.base_classes import MaterialCompositionFeaturizer
from deepchem.feat.base_classes import ComplexFeaturizer
from deepchem.feat.base_classes import UserDefinedFeaturizer
from deepchem.feat.graph_features import ConvMolFeaturizer
+13 −14
Original line number Diff line number Diff line
@@ -9,8 +9,6 @@ from typing import Iterable, Union, Dict, Any

logger = logging.getLogger(__name__)

JSON = Dict[str, Any]


def _featurize_complex(featurizer, mol_pdb_file, protein_pdb_file, log_message):
  logging.info(log_message)
@@ -210,12 +208,12 @@ class MolecularFeaturizer(Featurizer):
    return self.featurize(molecules)


class StructureFeaturizer(Featurizer):
class MaterialStructureFeaturizer(Featurizer):
  """
  Abstract class for calculating a set of features for an
  inorganic crystal structure.

  The defining feature of a `StructureFeaturizer` is that it
  The defining feature of a `MaterialStructureFeaturizer` is that it
  operates on 3D crystal structures with periodic boundary conditions. 
  Inorganic crystal structures are represented by Pymatgen structure
  objects. Featurizers for inorganic crystal structures that are subclasses of
@@ -234,15 +232,16 @@ class StructureFeaturizer(Featurizer):

  """

  def featurize(self, structures: Iterable[JSON],
  def featurize(self,
                structures: Iterable[Dict[str, Any]],
                log_every_n: int = 1000) -> np.ndarray:
    """Calculate features for crystal structures.

    Parameters
    ----------
    structures: Iterable[JSON]
    structures: Iterable[Dict[str, Any]]
      Iterable sequence of pymatgen structure dictionaries.
      Json-serializable dictionary representation of pymatgen.core.structure
      Dictionary representations of pymatgen.Structure
      https://pymatgen.org/pymatgen.core.structure.html
    log_every_n: int, default 1000
      Logging messages reported every `log_every_n` samples.
@@ -282,7 +281,7 @@ class StructureFeaturizer(Featurizer):
    features = np.asarray(features)
    return features

  def _featurize(self, structure: "pymatgen.Structure"):
  def _featurize(self, structure):
    """Calculate features for a single crystal structure.

    Parameters
@@ -294,25 +293,25 @@ class StructureFeaturizer(Featurizer):

    raise NotImplementedError('Featurizer is not defined.')

  def __call__(self, structures: Iterable[dict]):
  def __call__(self, structures: Iterable[Dict[str, Any]]):
    """Calculate features for crystal structures.

    Parameters
    ----------
    structures: Iterable[dict]
      An iterable of crystal structure dictionaries.
    structures: Iterable[Dict[str, Any]]
      An iterable of pymatgen.Structure dictionaries.

    """

    return self.featurize(structures)


class CompositionFeaturizer(Featurizer):
class MaterialCompositionFeaturizer(Featurizer):
  """
  Abstract class for calculating a set of features for an
  inorganic crystal composition.

  The defining feature of a `CompositionFeaturizer` is that it
  The defining feature of a `MaterialCompositionFeaturizer` is that it
  operates on 3D crystal chemical compositions. 
  Inorganic crystal compositions are represented by Pymatgen composition
  objects. Featurizers for inorganic crystal compositions that are 
@@ -377,7 +376,7 @@ class CompositionFeaturizer(Featurizer):
    features = np.asarray(features)
    return features

  def _featurize(self, composition: "pymatgen.Composition"):
  def _featurize(self, composition):
    """Calculate features for a single crystal composition.

    Parameters
+6 −6
Original line number Diff line number Diff line
@@ -4,11 +4,11 @@ Featurizers for inorganic crystals.

import numpy as np

from deepchem.feat import StructureFeaturizer, CompositionFeaturizer
from deepchem.feat import MaterialStructureFeaturizer, MaterialCompositionFeaturizer
from deepchem.utils import pad_array


class ElementPropertyFingerprint(CompositionFeaturizer):
class ElementPropertyFingerprint(MaterialCompositionFeaturizer):
  """
  Fingerprint of elemental properties from composition.

@@ -50,7 +50,7 @@ class ElementPropertyFingerprint(CompositionFeaturizer):

    self.data_source = data_source

  def _featurize(self, composition: "pymatgen.Composition"):
  def _featurize(self, composition):
    """
    Calculate chemical fingerprint from crystal composition.

@@ -81,7 +81,7 @@ class ElementPropertyFingerprint(CompositionFeaturizer):
    return np.array(feats)


class SineCoulombMatrix(StructureFeaturizer):
class SineCoulombMatrix(MaterialStructureFeaturizer):
  """
  Calculate sine Coulomb matrix for crystals.

@@ -124,7 +124,7 @@ class SineCoulombMatrix(StructureFeaturizer):
    self.max_atoms = int(max_atoms)
    self.flatten = flatten

  def _featurize(self, struct: "pymatgen.Structure"):
  def _featurize(self, struct):
    """
    Calculate sine Coulomb matrix from pymatgen structure.

@@ -164,7 +164,7 @@ class SineCoulombMatrix(StructureFeaturizer):
    return features


class StructureGraphFeaturizer(StructureFeaturizer):
class StructureGraphFeaturizer(MaterialStructureFeaturizer):
  """
  Calculate structure graph features for crystals.

+8 −8
Original line number Diff line number Diff line
@@ -161,17 +161,17 @@ AtomConvFeaturizer
.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
  :members:

StructureFeaturizer
-------------------
MaterialStructureFeaturizer
---------------------------

Structure Featurizers are those that work with datasets of crystals with
Material Structure Featurizers are those that work with datasets of crystals with
periodic boundary conditions. For inorganic crystal structures, these
featurizers operate on pymatgen.Structure objects, which include a
lattice and 3D coordinates that specify a periodic crystal structure. 
They should be applied on systems that have periodic boundary conditions.
Structure featurizers are not designed to work with molecules. 

.. autoclass:: deepchem.feat.StructureFeaturizer
.. autoclass:: deepchem.feat.MaterialStructureFeaturizer
  :members:

SineCoulombMatrix
@@ -186,17 +186,17 @@ StructureGraphFeaturizer
.. autoclass:: deepchem.feat.StructureGraphFeaturizer
  :members:

CompositionFeaturizer
---------------------
MaterialCompositionFeaturizer
-----------------------------

Composition Featurizers are those that work with datasets of crystal
Material Composition Featurizers are those that work with datasets of crystal
compositions with periodic boundary conditions. 
For inorganic crystal structures, these featurizers operate on chemical
compositions (e.g. "MoS2"). They should be applied on systems that have
periodic boundary conditions. Composition featurizers are not designed 
to work with molecules. 

.. autoclass:: deepchem.feat.CompositionFeaturizer
.. autoclass:: deepchem.feat.MaterialCompositionFeaturizer
  :members:

ElementPropertyFingerprint