Commit 3ea6281b authored by VIGNESHinZONE's avatar VIGNESHinZONE
Browse files

Adding Examples for LCNNBlock, LCNN, LCNNModel and test functions

parent dd70ef0c
Loading
Loading
Loading
Loading
+75 −0
Original line number Diff line number Diff line
import unittest
import tempfile
from os import path
import numpy as np
from deepchem.utils import load_dataset_from_disk, download_url, untargz_file
from deepchem.metrics import Metric, mae_score
from deepchem.models import LCNNModel

try:
  import dgl
  import torch
  has_pytorch_and_dgl = True
except:
  has_pytorch_and_dgl = False

URL = "https://deepchemdata.s3-us-west-1.amazonaws.com/featurized_datasets/lcnn_data_feature.tar.gz"


@unittest.skipIf(not has_pytorch_and_dgl, 'PyTorch and DGL are not installed')
def test_lcnn_regression():

  current_dir = tempfile.mkdtemp()
  download_url(url=URL, dest_dir=current_dir)
  untargz_file(path.join(current_dir, 'lcnn_data_feature.tar.gz'), current_dir)
  tasks, datasets, transformers = load_dataset_from_disk(
      path.join(current_dir, 'lcnn_data'))
  train, valid, test = datasets
  model = LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
  model.fit(train, nb_epoch=10)

  # check predict shape
  valid_preds = model.predict_on_batch(valid.X)
  assert valid_preds.shape == (65, 1)
  test_preds = model.predict(test)
  assert test_preds.shape == (65, 1)
  # check overfit
  regression_metric = Metric(mae_score)
  scores = model.evaluate(test, [regression_metric], transformers)
  assert scores[regression_metric.name] < 0.6


@unittest.skipIf(not has_pytorch_and_dgl, 'PyTorch and DGL are not installed')
def test_lcnn_reload():

  # needs change
  current_dir = tempfile.mkdtemp()
  download_url(url=URL, dest_dir=current_dir)
  untargz_file(path.join(current_dir, 'lcnn_data_feature.tar.gz'), current_dir)
  tasks, datasets, transformers = load_dataset_from_disk(
      path.join(current_dir, 'lcnn_data'))
  train, valid, test = datasets
  model_dir = tempfile.mkdtemp()
  model = LCNNModel(
      mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir)
  model.fit(train, nb_epoch=10)

  # check predict shape
  valid_preds = model.predict_on_batch(valid.X)
  assert valid_preds.shape == (65, 1)
  test_preds = model.predict(test)
  assert test_preds.shape == (65, 1)
  # check overfit
  regression_metric = Metric(mae_score)
  scores = model.evaluate(test, [regression_metric], transformers)
  assert scores[regression_metric.name] < 0.6

  # reload
  reloaded_model = LCNNModel(
      mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir)
  reloaded_model.restore()

  original_pred = model.predict(test)
  reload_pred = reloaded_model.predict(test)

  assert np.all(np.abs(original_pred - reload_pred) < 0.0000001)
+175 −19
Original line number Diff line number Diff line
@@ -22,11 +22,30 @@ class LCNNBlock(nn.Module):

  [2] After the linear layer on each permutations, they are added up for each node and
  each node is transformed into a vector.

  Examples
  --------
  >>> import deepchem as dc
  >>> from deepchem.models.torch_models.lcnn import LCNNBlock
  >>> from deepchem.feat.graph_data import GraphData
  >>> import numpy as np
  >>> nodes = np.array([0, 1, 2])
  >>> x = np.zeros((nodes.size, nodes.max()+1))
  >>> x[np.arange(nodes.size),nodes] = 1
  >>> v = np.array([ 0,0, 0,0, 1,1, 1,1, 2,2, 2,2 ])
  >>> u = np.array([ 1,2, 2,1, 2,0, 0,2, 1,0, 0,1 ])
  >>> graph = GraphData(node_features=x, edge_index=np.array([u, v]))
  >>> model = LCNNBlock(3*2, 3, 2)
  >>> G = graph.to_dgl_graph()
  >>> x = G.ndata.pop('x')
  >>> print(model(G, x).shape)
  torch.Size([3, 3])

  """

  def __init__(self,
               input_feature: int,
               output_feature: int = 5,
               output_feature: int = 19,
               n_permutation_list: int = 6,
               dropout: float = 0.2,
               UseBN: bool = True):
@@ -37,9 +56,9 @@ class LCNNBlock(nn.Module):
    ----------
    input_feature: int
        Dimenion of the concatenated input vector. Node_feature_size*number of neighbors
    output_feature: int
    output_feature: int, default 19
        Dimension of feature size of the convolution
    n_permutation_list: int
    n_permutation_list: int, default 6
        Diffrent permutations taken along diffrent directions
    dropout: float
        p value for dropout between 0.0 to 1.0
@@ -81,6 +100,7 @@ class LCNNBlock(nn.Module):
      import dgl.function as fn
    except:
      raise ImportError("This class requires DGL to be installed.")
    G = G.local_var()
    G.ndata['x'] = node_feats
    G.update_all(fn.copy_u('x', 'm'), self.reduce_func)
    X = self.conv_weights(G.ndata['X_site'])
@@ -152,7 +172,7 @@ class Atom_Wise_Convolution(nn.Module):
        Size of input feature size
    output_feature: int
        Size of output feature size
    dropout: float
    dropout: float, defult 0.2
        p value for dropout between 0.0 to 1.0
    UseBN: bool
        Setting it to True will perform Batch Normalisation
@@ -265,6 +285,68 @@ class LCNN(nn.Module):
  [3] Transformation to scalar value for each node.
  [4] Average of properties per each element in a configuration

  Examples
  --------
  >>> import deepchem as dc
  >>> from pymatgen import Structure
  >>> import numpy as np
  >>> PRIMITIVE_CELL = {
  ...   "lattice": [[2.818528, 0.0, 0.0],
  ...               [-1.409264, 2.440917, 0.0],
  ...               [0.0, 0.0, 25.508255]],
  ...   "coords": [[0.66667, 0.33333, 0.090221],
  ...              [0.33333, 0.66667, 0.18043936],
  ...              [0.0, 0.0, 0.27065772],
  ...              [0.66667, 0.33333, 0.36087608],
  ...              [0.33333, 0.66667, 0.45109444],
  ...              [0.0, 0.0, 0.49656991]],
  ...   "species": ['H', 'H', 'H', 'H', 'H', 'He'],
  ...   "site_properties": {'SiteTypes': ['S1', 'S1', 'S1', 'S1', 'S1', 'A1']}
  ... }
  >>> PRIMITIVE_CELL_INF0 = {
  ...    "cutoff": np.around(6.00),
  ...    "structure": Structure(**PRIMITIVE_CELL),
  ...    "aos": ['1', '0', '2'],
  ...    "pbc": [True, True, False],
  ...    "ns": 1,
  ...    "na": 1
  ... }
  >>> DATA_POINT = {
  ...   "lattice": [[1.409264, -2.440917, 0.0],
  ...               [4.227792, 2.440917, 0.0],
  ...               [0.0, 0.0, 23.17559]],
  ...   "coords": [[0.0, 0.0, 0.099299],
  ...              [0.0, 0.33333, 0.198598],
  ...              [0.5, 0.16667, 0.297897],
  ...              [0.0, 0.0, 0.397196],
  ...              [0.0, 0.33333, 0.496495],
  ...              [0.5, 0.5, 0.099299],
  ...              [0.5, 0.83333, 0.198598],
  ...              [0.0, 0.66667, 0.297897],
  ...              [0.5, 0.5, 0.397196],
  ...              [0.5, 0.83333, 0.496495],
  ...              [0.0, 0.66667, 0.54654766],
  ...              [0.5, 0.16667, 0.54654766]],
  ...   "species": ['H', 'H', 'H', 'H', 'H', 'H',
  ...               'H', 'H', 'H', 'H', 'He', 'He'],
  ...   "site_properties": {
  ...     "SiteTypes": ['S1', 'S1', 'S1', 'S1', 'S1',
  ...                   'S1', 'S1', 'S1', 'S1', 'S1',
  ...                   'A1', 'A1'],
  ...     "oss": ['-1', '-1', '-1', '-1', '-1', '-1',
  ...             '-1', '-1', '-1', '-1', '0', '2']
  ...                   }
  ... }
  >>> featuriser = dc.feat.LCNNFeaturizer(**PRIMITIVE_CELL_INF0)
  >>> lcnn_feat = featuriser._featurize(Structure(**DATA_POINT)).to_dgl_graph()
  >>> print(type(lcnn_feat))
  <class 'dgl.heterograph.DGLHeteroGraph'>
  >>> model = LCNN()
  >>> out = model(lcnn_feat)
  >>> print(type(out))
  <class 'torch.Tensor'>


  Refrences
  -----------

@@ -283,27 +365,30 @@ class LCNN(nn.Module):
               n_occupancy: int = 3,
               n_neighbor_sites: int = 19,
               n_permutation: int = 6,
               n_task: int = 1,
               dropout_rate: float = 0.2,
               n_conv: int = 2,
               n_features: int = 19,
               sitewise_n_feature: int = 25):
    """
    parameters
    Parameters
    ----------

    n_occupancy: int
    n_occupancy: int, default 3
        number of possible occupancy
    n_neighbor_sites_list: int
    n_neighbor_sites_list: int, default 19
        Number of neighbors of each site.
    n_permutation: int
    n_permutation: int, default 6
        Diffrent permutations taken along diffrent directions.
    dropout_rate: float
    n_task: int, default 1
        Number of tasks
    dropout_rate: float, default 0.2
        p value for dropout between 0.0 to 1.0
    nconv: int
    nconv: int, default 2
        number of convolutions performed
    n_feature: int
    n_feature: int, default 19
        number of feature for each site
    sitewise_n_feature: int
    sitewise_n_feature: int, default 25
        number of features for atoms for site-wise activation

    """
@@ -317,6 +402,7 @@ class LCNN(nn.Module):
    self.LCNN_blocks = nn.Sequential(*modules)
    self.Atom_wise_Conv = Atom_Wise_Convolution(n_features, sitewise_n_feature)
    self.Atom_wise_Lin = AtomWiseLinear(sitewise_n_feature, sitewise_n_feature)
    self.fc = nn.Linear(sitewise_n_feature, n_task)

  def forward(self, G):
    """
@@ -335,15 +421,17 @@ class LCNN(nn.Module):
      import dgl
    except:
      raise ImportError("This class requires DGL to be installed.")
    G = G.local_var()
    node_feats = G.ndata.pop('x')

    for conv in self.LCNN_blocks:
      node_feats = conv(G, node_feats)
    node_feats = self.Atom_wise_Conv(node_feats)
    node_feats = self.Atom_wise_Lin(node_feats).sum(axis=1)
    node_feats = self.Atom_wise_Lin(node_feats)
    G.ndata['new'] = node_feats

    y = dgl.mean_nodes(G, 'new')
    y = self.fc(y)
    return y


@@ -353,11 +441,11 @@ class LCNNModel(TorchModel):
  Here is a simple example of code that uses the LCNNModel with
  Platinum 2d Adsorption dataset.

  - import deepchem as dc
  - tasks, datasets, transformers = dc.molnet.load_Platinum_Adsorption()
  - train, valid, test = datasets
  - model = dc.models.LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
  - model.fit(train, nb_epoch=10)
  >> import deepchem as dc
  >> tasks, datasets, transformers = dc.molnet.load_Platinum_Adsorption()
  >> train, valid, test = datasets
  >> model = dc.models.LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
  >> model.fit(train, nb_epoch=10)

  This model takes arbitrary configurations of Molecules on an adsorbate and predicts
  their formation energy. These formation energies are found using DFT calculations and
@@ -367,6 +455,47 @@ class LCNNModel(TorchModel):
  On each node for each permutation, the neighbour nodes are concatenated which are further operated.
  This model has only a node representation. Please confirm the detail algorithms from [1]_.

  Examples
  --------
  >>>
  >> import deepchem as dc
  >> from pymatgen import Structure
  >> import numpy as np
  >> from deepchem.feat import LCNNFeaturizer
  >> from deepchem.molnet import load_Platinum_Adsorption
  >> PRIMITIVE_CELL = {
  ..   "lattice": [[2.818528, 0.0, 0.0],
  ..               [-1.409264, 2.440917, 0.0],
  ..               [0.0, 0.0, 25.508255]],
  ..   "coords": [[0.66667, 0.33333, 0.090221],
  ..              [0.33333, 0.66667, 0.18043936],
  ..              [0.0, 0.0, 0.27065772],
  ..              [0.66667, 0.33333, 0.36087608],
  ..              [0.33333, 0.66667, 0.45109444],
  ..              [0.0, 0.0, 0.49656991]],
  ..   "species": ['H', 'H', 'H', 'H', 'H', 'He'],
  ..   "site_properties": {'SiteTypes': ['S1', 'S1', 'S1', 'S1', 'S1', 'A1']}
  .. }
  >> PRIMITIVE_CELL_INF0 = {
  ..    "cutoff": np.around(6.00),
  ..    "structure": Structure(**PRIMITIVE_CELL),
  ..    "aos": ['1', '0', '2'],
  ..    "pbc": [True, True, False],
  ..    "ns": 1,
  ..    "na": 1
  .. }
  >> tasks, datasets, transformers = load_Platinum_Adsorption(
  ..    featurizer= LCNNFeaturizer( **PRIMITIVE_CELL_INF0)
  .. )
  >> train, val, test = datasets
  >> model = LCNNModel(mode='regression',
  ..                   batch_size=8,
  ..                   learning_rate=0.001)
  >> model = LCNN()
  >> out = model(lcnn_feat)
  >> model.fit(train, nb_epoch=10)


  References
  ----------
  .. [1] Jonathan Lym and Geun Ho Gu, J. Phys. Chem. C 2019, 123, 18951−18959.
@@ -380,17 +509,43 @@ class LCNNModel(TorchModel):
               n_occupancy: int = 3,
               n_neighbor_sites_list: int = 19,
               n_permutation_list: int = 6,
               n_task: int = 1,
               dropout_rate: float = 0.4,
               n_conv: int = 2,
               n_features: int = 44,
               sitewise_n_feature: int = 25,
               **kwargs):
    """
    This class accepts all the keyword arguments from TorchModel.

    Parameters
    ----------

    n_occupancy: int, default 3
        number of possible occupancy.
    n_neighbor_sites_list: int, default 19
        Number of neighbors of each site.
    n_permutation: int, default 6
        Diffrent permutations taken along diffrent directions.
    n_task: int, default 1
        Number of tasks.
    dropout_rate: float, default 0.4
        p value for dropout between 0.0 to 1.0
    nconv: int, default 2
        number of convolutions performed.
    n_feature: int, default 44
        number of feature for each site.
    sitewise_n_feature: int, default 25
        number of features for atoms for site-wise activation.
    kwargs: Dict
        This class accepts all the keyword arguments from TorchModel.
    """

    def init_weights(m):
      if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform_(m.weight)

    model = LCNN(n_occupancy, n_neighbor_sites_list, n_permutation_list,
    model = LCNN(n_occupancy, n_neighbor_sites_list, n_permutation_list, n_task,
                 dropout_rate, n_conv, n_features, sitewise_n_feature)
    model.apply(init_weights)
    loss = L2Loss()
@@ -401,6 +556,7 @@ class LCNNModel(TorchModel):
  def _prepare_batch(self, batch):
    """
    Create batch data for LCNN.

    Parameters
    ----------
    batch: Tuple