Commit 9bc05717 authored by VIGNESHinZONE's avatar VIGNESHinZONE
Browse files

Delete unwanted files

parent 1bacf516
Loading
Loading
Loading
Loading
+0 −84
Original line number Diff line number Diff line
import numpy as np
from deepchem.feat import MaterialStructureFeaturizer
from deepchem.utils.typing import PymatgenStructure
from typing import Dict, Callable


class MegnetFeaturizer(MaterialStructureFeaturizer):
  """
    Calculate structure graph features for crystals

    Based on the implementation of "Graph Networks as a Universal
    Machine Learning Framework for Molecules and Crystals" (MEGNET).
    The method constructs a crystal graph representation including
    atom features and bond features (neighbor distances). Neighbors
    are determined by searching in a sphere around atoms in the unit
    cell. A Gaussian filter is applied to neighbor distances.
    All units are in angstrom.

    1. Node feature -  The atomic number of element (1-94)
    2. Edge feature -  Expanded distance with Gaussian basis exp(−(r − r0)^2/σ2)
        centered at 100 points linearly placed between 0 and 5 and σ = 0.5

    References
    ----------
    .. [1] Chi Chen et al, Chem. Mater. 2019, 31, 9, 3564–3572

    Examples
    --------
    >>> import pymatgen as mg
    >>> lattice = mg.Lattice.cubic(4.2)
    >>> structure = mg.Structure(lattice, ["Cs", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]])
    >>> featurizer = MegnetFeaturizer(bond_edge=5)
    >>> features = featurizer.featurize([structure])
    >>> feature = features[0]
    >>> print(feature.keys())
    dict_keys(['atom', 'bond', 'state', 'index1', 'index2'])

    Notes
    -----
    This Class requires pymatgen , networkx , scipy installed.
    """

  def __init__(self,
               atom_converter: Callable = None,
               center: int = 100,
               width: float = 0.5,
               bond_converter: Callable = None,
               cutoff: float = 5.0):
    """
        Parameters
        ----------
        atom_converter: Callable
            A function to convert Atomic weight into corresponding embedding vectors.
        center: int
            centers for the Gaussian basis
        width: float
            width of Gaussian basis
        bond_converter: Callable
        cutoff: float
            cutoff radius
        """
    try:
      from megnet.data.crystal import CrystalGraph
      from megnet.data.graph import GaussianDistance
    except:
      raise ImportError(
          "This class requires MEGNET and Pymatgen to be installed.")

    self.atom_converter = atom_converter
    self.bond_converter = bond_converter
    if bond_converter is None:
      self.bond_converter = GaussianDistance(np.linspace(0, 5, center), width)
    self.cnv = CrystalGraph(
        atom_converter=self.atom_converter,
        bond_converter=self.bond_converter,
        cutoff=cutoff)

  def _featurize(self, struct: PymatgenStructure) -> Dict[str, list]:
    output = self.cnv.convert(struct)
    if self.atom_converter is not None:
      output["atom"] = self.atom_converter.convert(output["atom"])
    if self.bond_converter is not None:
      output["bond"] = self.bond_converter.convert(output["bond"])
    return output

deepchem/models/megnet.py

deleted100644 → 0
+0 −172
Original line number Diff line number Diff line
from deepchem.models import KerasModel
from deepchem.models.losses import Loss, L2Loss
from deepchem.models.losses import BinaryCrossEntropy
import tensorflow as tf
import numpy as np


class megnet_model(KerasModel):
  """
    It directly imports MEGNET model imported by the authors of the paper
    from their original repository.

    https://github.com/materialsvirtuallab/megnet/blob/master/megnet/models/megnet.py

    """

  def __init__(self,
               batch_size: int = 128,
               mode: str = 'regression',
               nfeat_edge: int = 100,
               nfeat_global: int = 2,
               nfeat_node: int = None,
               nblocks: int = 3,
               lr: float = 1e-3,
               n1: int = 64,
               n2: int = 32,
               n3: int = 16,
               nvocal: int = 95,
               embedding_dim: int = 16,
               nbvocal: int = None,
               bond_embedding_dim: int = None,
               ngvocal: int = None,
               global_embedding_dim: int = None,
               npass: int = 3,
               ntarget: int = 1,
               metrics=None,
               l2_coef: float = None,
               dropout: float = None,
               dropout_on_predict: bool = False,
               sample_weight_mode: str = None,
               **kwargs):
    try:
      from megnet.models.megnet import make_megnet_model
    except:
      raise ImportError("This class requires MEGNET to be installed.")

    batch_size = batch_size
    if mode == "regression":
      loss: Loss = L2Loss()
      output_types = ['prediction']
      is_classification = False
    else:
      loss = BinaryCrossEntropy()
      output_types = ['prediction']
      is_classification = True

    model = make_megnet_model(
        nfeat_edge=nfeat_edge,
        nfeat_global=nfeat_global,
        nfeat_node=nfeat_node,
        nblocks=nblocks,
        n1=n1,
        n2=n2,
        n3=n3,
        nvocal=nvocal,
        embedding_dim=embedding_dim,
        nbvocal=nbvocal,
        bond_embedding_dim=bond_embedding_dim,
        ngvocal=ngvocal,
        global_embedding_dim=global_embedding_dim,
        npass=npass,
        ntarget=ntarget,
        is_classification=is_classification,
        l2_coef=l2_coef,
        dropout=dropout,
        dropout_on_predict=dropout_on_predict,
        **kwargs,
    )

    output_types = output_types
    loss = L2Loss()
    super(megnet_model, self).__init__(
        model,
        loss,
        output_types,
        batch_size=batch_size,
        learning_rate=lr,
        **kwargs)

  def _compute_model(self, inputs):
    return self.model(inputs, training=False)[0]

  def _create_gradient_fn(self, variables):
    """Create a function that computes gradients and applies them to the model.
      Because of the way TensorFlow function tracing works, we need to create a
      separate function for each new set of variables.
      """

    @tf.function(experimental_relax_shapes=True)
    def apply_gradient_for_batch(inputs, labels, weights, loss):
      with tf.GradientTape() as tape:

        outputs = self.model(inputs, training=True)[0]
        if tf.is_tensor(outputs):
          outputs = [outputs]
        if self._loss_outputs is not None:
          outputs = [outputs[i] for i in self._loss_outputs]
        batch_loss = loss(outputs, labels, weights)
      if variables is None:
        vars = self.model.trainable_variables
      else:
        vars = variables
      grads = tape.gradient(batch_loss, vars)
      self._tf_optimizer.apply_gradients(zip(grads, vars))
      self._global_step.assign_add(1)
      return batch_loss

    return apply_gradient_for_batch

  def default_generator(self,
                        dataset,
                        epochs=1,
                        mode='fit',
                        deterministic=True,
                        pad_batches=False):
    """
        Follows a batching stratergy for grouping Graphs generated by the MEGNET featurisers.
        """
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        output = []
        for feature in ["atom", "bond", "state", "index1", "index2"]:
          output.append([np.array(x[feature]) for x in X_b])
        output = tuple(output)
        feature_list_temp, connection_list_temp, global_list_temp, index1_temp, index2_temp = output

        gnode = []
        for i, j in enumerate(feature_list_temp):
          gnode += [i] * len(j)
        # get bond features from a batch of structures
        # get bond's structure id
        gbond = []
        for i, j in enumerate(connection_list_temp):
          gbond += [i] * len(j)

        feature_list_temp = np.concatenate(feature_list_temp, axis=0)
        connection_list_temp = np.concatenate(connection_list_temp, axis=0)

        global_list_temp = np.concatenate(global_list_temp, axis=0)
        index1 = []
        index2 = []
        offset_ind = 0
        for ind1, ind2 in zip(index1_temp, index2_temp):
          index1 += [i + offset_ind for i in ind1]
          index2 += [i + offset_ind for i in ind2]
          offset_ind += max(ind1) + 1

        inputs = (np.expand_dims(feature_list_temp, axis=0),
                  np.expand_dims(connection_list_temp, axis=0),
                  np.expand_dims(global_list_temp, axis=0),
                  np.expand_dims(np.array(index1, dtype=np.int32), axis=0),
                  np.expand_dims(np.array(index2, dtype=np.int32), axis=0),
                  np.expand_dims(np.array(gnode, dtype=np.int32), axis=0),
                  np.expand_dims(np.array(gbond, dtype=np.int32), axis=0))

        if y_b.ndim == 1:
          y_b = np.expand_dims(y_b, axis=-1)

        yield (inputs, [y_b], [w_b])