Adding Examples for LCNNBlock, LCNN, LCNNModel and test functions (3ea6281b) · Commits · 钟慕尧 / deepchem

deepchem/models/tests/test_lcnn.py

0 → 100644

+75 −0

Original line number	Diff line number	Diff line
		import unittest
		import tempfile
		from os import path
		import numpy as np
		from deepchem.utils import load_dataset_from_disk, download_url, untargz_file
		from deepchem.metrics import Metric, mae_score
		from deepchem.models import LCNNModel

		try:
		import dgl
		import torch
		has_pytorch_and_dgl = True
		except:
		has_pytorch_and_dgl = False

		URL = "https://deepchemdata.s3-us-west-1.amazonaws.com/featurized_datasets/lcnn_data_feature.tar.gz"


		@unittest.skipIf(not has_pytorch_and_dgl, 'PyTorch and DGL are not installed')
		def test_lcnn_regression():

		current_dir = tempfile.mkdtemp()
		download_url(url=URL, dest_dir=current_dir)
		untargz_file(path.join(current_dir, 'lcnn_data_feature.tar.gz'), current_dir)
		tasks, datasets, transformers = load_dataset_from_disk(
		path.join(current_dir, 'lcnn_data'))
		train, valid, test = datasets
		model = LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
		model.fit(train, nb_epoch=10)

		# check predict shape
		valid_preds = model.predict_on_batch(valid.X)
		assert valid_preds.shape == (65, 1)
		test_preds = model.predict(test)
		assert test_preds.shape == (65, 1)
		# check overfit
		regression_metric = Metric(mae_score)
		scores = model.evaluate(test, [regression_metric], transformers)
		assert scores[regression_metric.name] < 0.6


		@unittest.skipIf(not has_pytorch_and_dgl, 'PyTorch and DGL are not installed')
		def test_lcnn_reload():

		# needs change
		current_dir = tempfile.mkdtemp()
		download_url(url=URL, dest_dir=current_dir)
		untargz_file(path.join(current_dir, 'lcnn_data_feature.tar.gz'), current_dir)
		tasks, datasets, transformers = load_dataset_from_disk(
		path.join(current_dir, 'lcnn_data'))
		train, valid, test = datasets
		model_dir = tempfile.mkdtemp()
		model = LCNNModel(
		mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir)
		model.fit(train, nb_epoch=10)

		# check predict shape
		valid_preds = model.predict_on_batch(valid.X)
		assert valid_preds.shape == (65, 1)
		test_preds = model.predict(test)
		assert test_preds.shape == (65, 1)
		# check overfit
		regression_metric = Metric(mae_score)
		scores = model.evaluate(test, [regression_metric], transformers)
		assert scores[regression_metric.name] < 0.6

		# reload
		reloaded_model = LCNNModel(
		mode='regression', batch_size=8, learning_rate=0.001, model_dir=model_dir)
		reloaded_model.restore()

		original_pred = model.predict(test)
		reload_pred = reloaded_model.predict(test)

		assert np.all(np.abs(original_pred - reload_pred) < 0.0000001)

deepchem/models/torch_models/lcnn.py

+175 −19

Original line number	Diff line number	Diff line
		@@ -22,11 +22,30 @@ class LCNNBlock(nn.Module):

		[2] After the linear layer on each permutations, they are added up for each node and
		each node is transformed into a vector.

		Examples
		--------
		>>> import deepchem as dc
		>>> from deepchem.models.torch_models.lcnn import LCNNBlock
		>>> from deepchem.feat.graph_data import GraphData
		>>> import numpy as np
		>>> nodes = np.array([0, 1, 2])
		>>> x = np.zeros((nodes.size, nodes.max()+1))
		>>> x[np.arange(nodes.size),nodes] = 1
		>>> v = np.array([ 0,0, 0,0, 1,1, 1,1, 2,2, 2,2 ])
		>>> u = np.array([ 1,2, 2,1, 2,0, 0,2, 1,0, 0,1 ])
		>>> graph = GraphData(node_features=x, edge_index=np.array([u, v]))
		>>> model = LCNNBlock(3*2, 3, 2)
		>>> G = graph.to_dgl_graph()
		>>> x = G.ndata.pop('x')
		>>> print(model(G, x).shape)
		torch.Size([3, 3])

		"""

		def __init__(self,
		input_feature: int,
		output_feature: int = 5,
		output_feature: int = 19,
		n_permutation_list: int = 6,
		dropout: float = 0.2,
		UseBN: bool = True):
		@@ -37,9 +56,9 @@ class LCNNBlock(nn.Module):
		----------
		input_feature: int
		Dimenion of the concatenated input vector. Node_feature_size*number of neighbors
		output_feature: int
		output_feature: int, default 19
		Dimension of feature size of the convolution
		n_permutation_list: int
		n_permutation_list: int, default 6
		Diffrent permutations taken along diffrent directions
		dropout: float
		p value for dropout between 0.0 to 1.0
		@@ -81,6 +100,7 @@ class LCNNBlock(nn.Module):
		import dgl.function as fn
		except:
		raise ImportError("This class requires DGL to be installed.")
		G = G.local_var()
		G.ndata['x'] = node_feats
		G.update_all(fn.copy_u('x', 'm'), self.reduce_func)
		X = self.conv_weights(G.ndata['X_site'])
		@@ -152,7 +172,7 @@ class Atom_Wise_Convolution(nn.Module):
		Size of input feature size
		output_feature: int
		Size of output feature size
		dropout: float
		dropout: float, defult 0.2
		p value for dropout between 0.0 to 1.0
		UseBN: bool
		Setting it to True will perform Batch Normalisation
		@@ -265,6 +285,68 @@ class LCNN(nn.Module):
		[3] Transformation to scalar value for each node.
		[4] Average of properties per each element in a configuration

		Examples
		--------
		>>> import deepchem as dc
		>>> from pymatgen import Structure
		>>> import numpy as np
		>>> PRIMITIVE_CELL = {
		... "lattice": [[2.818528, 0.0, 0.0],
		... [-1.409264, 2.440917, 0.0],
		... [0.0, 0.0, 25.508255]],
		... "coords": [[0.66667, 0.33333, 0.090221],
		... [0.33333, 0.66667, 0.18043936],
		... [0.0, 0.0, 0.27065772],
		... [0.66667, 0.33333, 0.36087608],
		... [0.33333, 0.66667, 0.45109444],
		... [0.0, 0.0, 0.49656991]],
		... "species": ['H', 'H', 'H', 'H', 'H', 'He'],
		... "site_properties": {'SiteTypes': ['S1', 'S1', 'S1', 'S1', 'S1', 'A1']}
		... }
		>>> PRIMITIVE_CELL_INF0 = {
		... "cutoff": np.around(6.00),
		... "structure": Structure(**PRIMITIVE_CELL),
		... "aos": ['1', '0', '2'],
		... "pbc": [True, True, False],
		... "ns": 1,
		... "na": 1
		... }
		>>> DATA_POINT = {
		... "lattice": [[1.409264, -2.440917, 0.0],
		... [4.227792, 2.440917, 0.0],
		... [0.0, 0.0, 23.17559]],
		... "coords": [[0.0, 0.0, 0.099299],
		... [0.0, 0.33333, 0.198598],
		... [0.5, 0.16667, 0.297897],
		... [0.0, 0.0, 0.397196],
		... [0.0, 0.33333, 0.496495],
		... [0.5, 0.5, 0.099299],
		... [0.5, 0.83333, 0.198598],
		... [0.0, 0.66667, 0.297897],
		... [0.5, 0.5, 0.397196],
		... [0.5, 0.83333, 0.496495],
		... [0.0, 0.66667, 0.54654766],
		... [0.5, 0.16667, 0.54654766]],
		... "species": ['H', 'H', 'H', 'H', 'H', 'H',
		... 'H', 'H', 'H', 'H', 'He', 'He'],
		... "site_properties": {
		... "SiteTypes": ['S1', 'S1', 'S1', 'S1', 'S1',
		... 'S1', 'S1', 'S1', 'S1', 'S1',
		... 'A1', 'A1'],
		... "oss": ['-1', '-1', '-1', '-1', '-1', '-1',
		... '-1', '-1', '-1', '-1', '0', '2']
		... }
		... }
		>>> featuriser = dc.feat.LCNNFeaturizer(**PRIMITIVE_CELL_INF0)
		>>> lcnn_feat = featuriser._featurize(Structure(**DATA_POINT)).to_dgl_graph()
		>>> print(type(lcnn_feat))
		<class 'dgl.heterograph.DGLHeteroGraph'>
		>>> model = LCNN()
		>>> out = model(lcnn_feat)
		>>> print(type(out))
		<class 'torch.Tensor'>


		Refrences
		-----------

		@@ -283,27 +365,30 @@ class LCNN(nn.Module):
		n_occupancy: int = 3,
		n_neighbor_sites: int = 19,
		n_permutation: int = 6,
		n_task: int = 1,
		dropout_rate: float = 0.2,
		n_conv: int = 2,
		n_features: int = 19,
		sitewise_n_feature: int = 25):
		"""
		parameters
		Parameters
		----------

		n_occupancy: int
		n_occupancy: int, default 3
		number of possible occupancy
		n_neighbor_sites_list: int
		n_neighbor_sites_list: int, default 19
		Number of neighbors of each site.
		n_permutation: int
		n_permutation: int, default 6
		Diffrent permutations taken along diffrent directions.
		dropout_rate: float
		n_task: int, default 1
		Number of tasks
		dropout_rate: float, default 0.2
		p value for dropout between 0.0 to 1.0
		nconv: int
		nconv: int, default 2
		number of convolutions performed
		n_feature: int
		n_feature: int, default 19
		number of feature for each site
		sitewise_n_feature: int
		sitewise_n_feature: int, default 25
		number of features for atoms for site-wise activation

		"""
		@@ -317,6 +402,7 @@ class LCNN(nn.Module):
		self.LCNN_blocks = nn.Sequential(*modules)
		self.Atom_wise_Conv = Atom_Wise_Convolution(n_features, sitewise_n_feature)
		self.Atom_wise_Lin = AtomWiseLinear(sitewise_n_feature, sitewise_n_feature)
		self.fc = nn.Linear(sitewise_n_feature, n_task)

		def forward(self, G):
		"""
		@@ -335,15 +421,17 @@ class LCNN(nn.Module):
		import dgl
		except:
		raise ImportError("This class requires DGL to be installed.")
		G = G.local_var()
		node_feats = G.ndata.pop('x')

		for conv in self.LCNN_blocks:
		node_feats = conv(G, node_feats)
		node_feats = self.Atom_wise_Conv(node_feats)
		node_feats = self.Atom_wise_Lin(node_feats).sum(axis=1)
		node_feats = self.Atom_wise_Lin(node_feats)
		G.ndata['new'] = node_feats

		y = dgl.mean_nodes(G, 'new')
		y = self.fc(y)
		return y


		@@ -353,11 +441,11 @@ class LCNNModel(TorchModel):
		Here is a simple example of code that uses the LCNNModel with
		Platinum 2d Adsorption dataset.

		- import deepchem as dc
		- tasks, datasets, transformers = dc.molnet.load_Platinum_Adsorption()
		- train, valid, test = datasets
		- model = dc.models.LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
		- model.fit(train, nb_epoch=10)
		>> import deepchem as dc
		>> tasks, datasets, transformers = dc.molnet.load_Platinum_Adsorption()
		>> train, valid, test = datasets
		>> model = dc.models.LCNNModel(mode='regression', batch_size=8, learning_rate=0.001)
		>> model.fit(train, nb_epoch=10)

		This model takes arbitrary configurations of Molecules on an adsorbate and predicts
		their formation energy. These formation energies are found using DFT calculations and
		@@ -367,6 +455,47 @@ class LCNNModel(TorchModel):
		On each node for each permutation, the neighbour nodes are concatenated which are further operated.
		This model has only a node representation. Please confirm the detail algorithms from [1]_.

		Examples
		--------
		>>>
		>> import deepchem as dc
		>> from pymatgen import Structure
		>> import numpy as np
		>> from deepchem.feat import LCNNFeaturizer
		>> from deepchem.molnet import load_Platinum_Adsorption
		>> PRIMITIVE_CELL = {
		.. "lattice": [[2.818528, 0.0, 0.0],
		.. [-1.409264, 2.440917, 0.0],
		.. [0.0, 0.0, 25.508255]],
		.. "coords": [[0.66667, 0.33333, 0.090221],
		.. [0.33333, 0.66667, 0.18043936],
		.. [0.0, 0.0, 0.27065772],
		.. [0.66667, 0.33333, 0.36087608],
		.. [0.33333, 0.66667, 0.45109444],
		.. [0.0, 0.0, 0.49656991]],
		.. "species": ['H', 'H', 'H', 'H', 'H', 'He'],
		.. "site_properties": {'SiteTypes': ['S1', 'S1', 'S1', 'S1', 'S1', 'A1']}
		.. }
		>> PRIMITIVE_CELL_INF0 = {
		.. "cutoff": np.around(6.00),
		.. "structure": Structure(**PRIMITIVE_CELL),
		.. "aos": ['1', '0', '2'],
		.. "pbc": [True, True, False],
		.. "ns": 1,
		.. "na": 1
		.. }
		>> tasks, datasets, transformers = load_Platinum_Adsorption(
		.. featurizer= LCNNFeaturizer( **PRIMITIVE_CELL_INF0)
		.. )
		>> train, val, test = datasets
		>> model = LCNNModel(mode='regression',
		.. batch_size=8,
		.. learning_rate=0.001)
		>> model = LCNN()
		>> out = model(lcnn_feat)
		>> model.fit(train, nb_epoch=10)


		References
		----------
		.. [1] Jonathan Lym and Geun Ho Gu, J. Phys. Chem. C 2019, 123, 18951−18959.
		@@ -380,17 +509,43 @@ class LCNNModel(TorchModel):
		n_occupancy: int = 3,
		n_neighbor_sites_list: int = 19,
		n_permutation_list: int = 6,
		n_task: int = 1,
		dropout_rate: float = 0.4,
		n_conv: int = 2,
		n_features: int = 44,
		sitewise_n_feature: int = 25,
		**kwargs):
		"""
		This class accepts all the keyword arguments from TorchModel.

		Parameters
		----------

		n_occupancy: int, default 3
		number of possible occupancy.
		n_neighbor_sites_list: int, default 19
		Number of neighbors of each site.
		n_permutation: int, default 6
		Diffrent permutations taken along diffrent directions.
		n_task: int, default 1
		Number of tasks.
		dropout_rate: float, default 0.4
		p value for dropout between 0.0 to 1.0
		nconv: int, default 2
		number of convolutions performed.
		n_feature: int, default 44
		number of feature for each site.
		sitewise_n_feature: int, default 25
		number of features for atoms for site-wise activation.
		kwargs: Dict
		This class accepts all the keyword arguments from TorchModel.
		"""

		def init_weights(m):
		if type(m) == nn.Linear:
		torch.nn.init.xavier_uniform_(m.weight)

		model = LCNN(n_occupancy, n_neighbor_sites_list, n_permutation_list,
		model = LCNN(n_occupancy, n_neighbor_sites_list, n_permutation_list, n_task,
		dropout_rate, n_conv, n_features, sitewise_n_feature)
		model.apply(init_weights)
		loss = L2Loss()
		@@ -401,6 +556,7 @@ class LCNNModel(TorchModel):
		def _prepare_batch(self, batch):
		"""
		Create batch data for LCNN.

		Parameters
		----------
		batch: Tuple

Admin message