Commit 58dadbfc authored by Nathan Frey's avatar Nathan Frey
Browse files

Added matminer SCM and get all neighbors

parent 14cdfb43
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -23,4 +23,4 @@ from deepchem.feat.atomic_coordinates import AtomicCoordinates
from deepchem.feat.atomic_coordinates import NeighborListComplexAtomicCoordinates
from deepchem.feat.adjacency_fingerprints import AdjacencyFingerprint
from deepchem.feat.smiles_featurizers import SmilesToSeq, SmilesToImage
from deepchem.feat.materials_featurizers import ChemicalFingerprint, SineCoulombMatrix, StructureGraphFeaturizer
from deepchem.feat.materials_featurizers import ElementPropertyFingerprint, SineCoulombMatrix, StructureGraphFeaturizer
+108 −93
Original line number Diff line number Diff line
@@ -8,9 +8,9 @@ from deepchem.feat import Featurizer
from deepchem.utils import pad_array


class ChemicalFingerprint(Featurizer):
class ElementPropertyFingerprint(Featurizer):
  """
  Chemical fingerprint of elemental properties from composition.
  Fingerprint of elemental properties from composition.

  Based on the data source chosen, returns properties and statistics
  (min, max, range, mean, standard deviation, mode) for a compound
@@ -23,7 +23,8 @@ class ChemicalFingerprint(Featurizer):
  matminer. It may be useful when only crystal compositions are available
  (and not 3D coordinates).

  References are given for each data source:
  References
  ----------
  MagPie data: Ward, L. et al. npj Comput Mater 2, 16028 (2016).
    https://doi.org/10.1038/npjcompumats.2016.28

@@ -84,8 +85,7 @@ class SineCoulombMatrix(Featurizer):
  """
  Calculate sine Coulomb matrix for crystals.

  A variant of Coulomb matrix for periodic crystals, based on 
  Faber et al. Inter. J. Quantum Chem. 115, 16, (2015).
  A variant of Coulomb matrix for periodic crystals.

  The sine Coulomb matrix is identical to the Coulomb matrix, except
  that the inverse distance function is replaced by the inverse of
@@ -97,25 +97,30 @@ class SineCoulombMatrix(Featurizer):
  length, the maximum number of atoms (eigenvalues) in the input
  dataset must be specified.

  This featurizer requires the optional dependency pymatgen. It may be
  useful when crystal structures with 3D coordinates are available.
  This featurizer requires the optional dependencies pymatgen and
  matminer. It may be useful when crystal structures with 3D coordinates 
  are available.

  References
  ----------
  Faber et al. Inter. J. Quantum Chem. 115, 16, 2015.

  """

  def __init__(self, max_atoms, eig=True):
  def __init__(self, max_atoms, flatten=True):
    """
    Parameters
    ----------
    max_atoms : int
      Maximum number of atoms for any crystal in the dataset. Used to
      pad the Coulomb matrix.
    eig : bool (default True)
    flatten : bool (default True)
      Return flattened vector of matrix eigenvalues.

    """

    self.max_atoms = int(max_atoms)
    self.eig = eig
    self.flatten = flatten

  def _featurize(self, struct):
    """
@@ -135,105 +140,68 @@ class SineCoulombMatrix(Featurizer):
    """

    from pymatgen import Structure
    from matminer.featurizers.structure import SineCoulombMatrix as SCM

    s = Structure.from_dict(struct)
    features = self.sine_coulomb_matrix(s)
    features = np.asarray(features)

    return features
    # Get full N x N SCM
    scm = SCM(flatten=False)
    sine_mat = scm.featurize(s)

  def sine_coulomb_matrix(self, s):
    """
    Generate sine Coulomb matrices for each crystal.

    Parameters
    ----------
    s : pymatgen.core.structure
      A periodic crystal composed of a lattice and a sequence of atomic
      sites with 3D coordinates and elements.

    Returns
    -------
    eigs: np.ndarray
      1D matrix eigenvalues. 
    sine_mat: np.ndarray
      2D sine Coulomb matrix.

    """

    sites = s.sites
    atomic_numbers = np.array([site.specie.Z for site in sites])
    sine_mat = np.zeros((len(sites), len(sites)))
    coords = np.array([site.frac_coords for site in sites])
    lattice = s.lattice.matrix

    # Conversion factor
    ang_to_bohr = 1.8897543760313331

    for i in range(len(sine_mat)):
      for j in range(len(sine_mat)):
        if i == j:
          sine_mat[i][i] = 0.5 * atomic_numbers[i]**2.4
        elif i < j:
          vec = coords[i] - coords[j]
          coord_vec = np.sin(np.pi * vec)**2
          trig_dist = np.linalg.norm(
              (np.matrix(coord_vec) * lattice).A1) * ang_to_bohr
          sine_mat[i][j] = atomic_numbers[i] * atomic_numbers[j] / \
                          trig_dist
        else:
          sine_mat[i][j] = sine_mat[j][i]

    if self.eig:  # flatten array to eigenvalues
    if self.flatten:
      eigs, _ = np.linalg.eig(sine_mat)
      zeros = np.zeros((self.max_atoms,))
      zeros[:len(eigs)] = eigs
      eigs = zeros
      return eigs
      features = zeros
    else:
      sine_mat = pad_array(sine_mat, self.max_atoms)
      return sine_mat
      features = pad_array(sine_mat, self.max_atoms)

    features = np.asarray(features)

    return features


class StructureGraphFeaturizer(Featurizer):
  """
  Calculate structure graph for crystals.
  Calculate structure graph features for crystals.

  Create a graph representation of a crystal structure where atoms
  are nodes and connections between atoms (bonds) are edges. Bonds
  are determined by choosing a strategy for finding nearest neighbors
  from pymatgen.analysis.local_env. For periodic
  graphs, each edge belongs to a lattice image.

  The NetworkX package is used for graph representations.
  Hagberg, A. et al. SciPy2008, 11-15 (2008).
  Based on the implementation in Crystal Graph Convolutional
  Neural Networks (CGCNN). The method constructs a crystal graph
  representation including atom features (atomic numbers) and bond
  features (neighbor distances). Neighbors are determined by searching
  in a sphere around atoms in the unit cell. A Gaussian filter is
  applied to neighbor distances. All units are in angstrom.  

  This featurizer requires the optional dependency pymatgen. It may
  be useful when using graph network models and crystal graph
  convolutional networks.
  be useful when 3D coordinates are available and when using graph 
  network models and crystal graph convolutional networks.

  #TODO (@ncfrey) process graph features for models
  References
  ----------
  T. Xie and J. C. Grossman, Phys. Rev. Lett. 120, 2018.

  """

  def __init__(self, strategy=None):
  def __init__(self, radius=8.0, max_neighbors=12, step=0.2):
    """
    Parameters
    ----------
    strategy : pymatgen.analysis.local_env.NearNeighbors
      An instance of NearNeighbors that determines how graph is constructed.
    radius : float (default 8.0)
      Radius of sphere for finding neighbors of atoms in unit cell.
    max_neighbors : int (default 12)
      Maximum number of neighbors to consider when constructing graph.
    step : float (default 0.2)
      Step size for Gaussian filter.

    """

    if not strategy:
      from pymatgen.analysis.local_env import MinimumDistanceNN
      strategy = MinimumDistanceNN()

    self.strategy = strategy
    self.radius = radius
    self.max_neighbors = int(max_neighbors)
    self.step = step

  def _featurize(self, struct):
    """
    Calculate structure graph from pymatgen structure.
    Calculate crystal graph features from pymatgen structure.

    Parameters
    ----------
@@ -243,8 +211,9 @@ class StructureGraphFeaturizer(Featurizer):

    Returns
    -------
    feats: tuple
      atomic numbers, nodes, and edges in networkx.classes.multidigraph.MultiDiGraph format.
    feats: np.array
      Atomic and bond features. Atomic features are atomic numbers 
      and bond features are Gaussian filtered interatomic distances.

    """

@@ -254,6 +223,7 @@ class StructureGraphFeaturizer(Featurizer):
    s = Structure.from_dict(struct)

    features = self._get_structure_graph_features(s)
    features = np.array(features)

    return features

@@ -269,8 +239,8 @@ class StructureGraphFeaturizer(Featurizer):

    Returns
    -------
    feats: tuple
      atomic numbers, nodes, and edges in networkx.classes.multidigraph.MultiDiGraph format.
    feats: tuple[np.array]
      atomic numbers, filtered interatomic distance tensor, and neighbor ids
    
    """

@@ -278,8 +248,53 @@ class StructureGraphFeaturizer(Featurizer):

    atom_features = np.array([site.specie.Z for site in struct], dtype='int32')

    sg = StructureGraph.with_local_env_strategy(struct, self.strategy)
    nodes = np.array(list(sg.graph.nodes))
    edges = np.array(list(sg.graph.edges))
    neighbors = struct.get_all_neighbors(self.radius, include_index=True)
    neighbors = [sorted(n, key=lambda x: x[1]) for n in neighbors]

    # Get list of lists of neighbor distances
    neighbor_features, neighbor_idx = [], []
    for neighbor in neighbors:
      if len(neighbor) < self.max_neighbors:
        neighbor_idx.append(
            list(map(lambda x: x[2], neighbor)) +
            [0] * (self.max_neighbors - len(neighbor)))
        neighbor_features.append(
            list(map(lambda x: x[1], neighbor)) +
            [self.radius + 1.] * (self.max_neighbors - len(neighbor)))
      else:
        neighbor_idx.append(
            list(map(lambda x: x[2], neighbor[:self.max_neighbors])))
        neighbor_features.append(
            list(map(lambda x: x[1], neighbor[:self.max_neighbors])))

    neighbor_features = np.array(neighbor_features)
    neighbor_idx = np.array(neighbor_idx)
    neighbor_features = self._gaussian_filter(neighbor_features)
    neighbor_features = np.vstack(neighbor_features)

    return (atom_features, neighbor_features, neighbor_idx)

  def _gaussian_filter(self, distances):
    """
    Apply Gaussian filter to an array of interatomic distances.

    Parameters
    ----------
    distances : np.array
      Matrix of distances of dimension (num atoms) x (max neighbors). 

    Returns
    -------
    expanded_distances: np.array 
      Expanded distance tensor after Gaussian filtering. Dimensionality
      is (num atoms) x (max neighbors) x (len(filt))
    
    """

    filt = np.arange(0, self.radius + self.step, self.step)

    # Increase dimension of distance tensor and apply filter
    expanded_distances = np.exp(
        -(distances[..., np.newaxis] - filt)**2 / self.step**2)

    return (atom_features, nodes, edges)
    return expanded_distances
+6 −6
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ Test featurizers for inorganic crystals.
import numpy as np
import unittest

from deepchem.feat.materials_featurizers import ChemicalFingerprint, SineCoulombMatrix, StructureGraphFeaturizer
from deepchem.feat.materials_featurizers import ElementPropertyFingerprint, SineCoulombMatrix, StructureGraphFeaturizer


class TestMaterialFeaturizers(unittest.TestCase):
@@ -46,12 +46,12 @@ class TestMaterialFeaturizers(unittest.TestCase):
        }]
    }

  def testCF(self):
  def testEPF(self):
    """
    Test CF featurizer.
    Test Element Property featurizer.
    """

    featurizer = ChemicalFingerprint(data_source='matminer')
    featurizer = ElementPropertyFingerprint(data_source='matminer')
    features = featurizer.featurize([self.formula])

    assert len(features[0]) == 65
@@ -74,9 +74,9 @@ class TestMaterialFeaturizers(unittest.TestCase):
    Test StructureGraphFeaturizer.
    """

    featurizer = StructureGraphFeaturizer()
    featurizer = StructureGraphFeaturizer(radius=3.0, max_neighbors=6)
    features = featurizer.featurize([self.struct_dict])

    assert len(features[0]) == 3
    assert features[0][0] == 26
    assert len(features[0][2]) == 6
    assert features[0][1].shape == (6,16)
 No newline at end of file
+2 −2
Original line number Diff line number Diff line
@@ -125,10 +125,10 @@ lattice and 3D coordinates that specify a periodic crystal structure. They
should be applied on systems that have periodic boundary conditions. Materials
featurizers are not designed to work with molecules. 

ChemicalFingerprint
ElementPropertyFingerprint
^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.ChemicalFingerprint
.. autoclass:: deepchem.feat.ElementPropertyFingerprint
  :members:

SineCoulombMatrix