:sparkles: add doctsrings (c02779bd) · Commits · 钟慕尧 / deepchem

deepchem/feat/molecule_featurizers/mol_graph_conv_featurizer.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -78,7 +78,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		`constrcut_atom_feature` or `constrcut_bond_feature`.

		The default node representation are constructed by concatenating the following values,
		and the feature length is 25.
		and the feature length is 38.

		- Atom type: A one-hot vector of this atom, "C", "N", "O", "F", "P", "S", "Br", "I", "other atoms".
		- Chirality: A one-hot vector of the chirality, "R" or "S".
		@@ -92,7 +92,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		- Number of Hydrogens: A one-hot vector of the number of hydrogens (0-4) that this atom connected.

		The default edge representation are constructed by concatenating the following values,
		and the feature length is 6.
		and the feature length is 11.

		- Bond type: A one-hot vector of the bond type, "single", "double", "triple", or "aromatic".
		- Same ring: A one-hot vector of whether the atoms in the pair are in the same ring.
		@@ -109,6 +109,10 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		>>> out = featurizer.featurize(smiles)
		>>> type(out[0])
		<class 'deepchem.feat.graph_data.GraphData'>
		>>> out[0].num_node_features
		38
		>>> out[0].num_edge_features
		11

		References
		----------

deepchem/models/tests/test_gat.py

+4 −2

Original line number	Diff line number	Diff line
		@@ -12,11 +12,13 @@ except:
		has_pytorch_and_pyg = False


		@unittest.skipIf(not has_pytorch_and_pyg, 'PyTorch and PyTorch Geometric are not installed')
		@unittest.skipIf(not has_pytorch_and_pyg,
		'PyTorch and PyTorch Geometric are not installed')
		def test_gat_classification():
		# load datasets
		featurizer = MolGraphConvFeaturizer()
		tasks, dataset, transformers, metric = get_dataset('regression', featurizer=featurizer)
		tasks, dataset, transformers, metric = get_dataset(
		'regression', featurizer=featurizer)
		n_tasks = len(tasks)

		# initialize models

deepchem/models/torch_models/cgcnn.py

+0 −4

Original line number	Diff line number	Diff line
		@@ -294,10 +294,6 @@ class CGCNNModel(TorchModel):
		The labels converted to torch.Tensor
		weights: List[torch.Tensor] or None
		The weights for each sample or sample/task pair converted to torch.Tensor

		Notes
		-----
		This class requires DGL and PyTorch to be installed.
		"""
		try:
		import dgl

deepchem/models/torch_models/gat.py

+64 −17

Original line number	Diff line number	Diff line
		@@ -9,11 +9,18 @@ from deepchem.models.torch_models.torch_model import TorchModel
		class GAT(nn.Module):
		"""Graph Attention Networks.

		TODO: add more docstring
		This model takes arbitary graphs as an input, and predict graph properties. This model is
		one of variants of Graph Convolutional Networks. The main difference between basic GCN models
		is how to update node representations. The GAT uses multi head attention mechanisms which
		outbroke in NLP like Transformer when updating node representations. The most important advantage
		of this approach is that we can get the interpretability like how the model predict the value
		or which part of the graph structure is important from attention-weight. Please confirm
		the detail algorithms from [1]_.

		Examples
		--------
		>>> import deepchem as dc
		>>> from torch_geometric.data import Batch
		>>> smiles = ["C1CCC1", "C1=CC=CN=C1"]
		>>> featurizer = dc.feat.MolGraphConvFeaturizer()
		>>> graphs = featurizer.featurize(smiles)
		@@ -21,11 +28,12 @@ class GAT(nn.Module):
		<class 'deepchem.feat.graph_data.GraphData'>
		>>> pyg_graphs = [graph.to_pyg_graph() for graph in graphs]
		>>> print(type(pyg_graphs[0]))
		>>> model = dc.models.GAT(n_out=1)
		>>> out = model(pyg_graphs)
		<class 'torch_geometric.data.data.Data'>
		>>> model = dc.models.GAT(n_tasks=2)
		>>> out = model(Batch.from_data_list(pyg_graphs))
		>>> print(type(out))
		<class 'torch.Tensor'>
		>>> out.shape == (1, 1)
		>>> out.shape == (2, 2)
		True

		References
		@@ -40,7 +48,7 @@ class GAT(nn.Module):

		def __init__(
		self,
		in_node_dim: int = 25,
		in_node_dim: int = 38,
		hidden_node_dim: int = 64,
		heads: int = 4,
		dropout_rate: float = 0.0,
		@@ -49,7 +57,23 @@ class GAT(nn.Module):
		n_tasks: int = 1,
		):
		"""
		TODO: add docstring
		Parameters
		----------
		in_node_dim: int, default 38
		The length of the initial node feature vectors. The 38 is
		based on `MolGraphConvFeaturizer`.
		hidden_node_dim: int, default 64
		The length of the hidden node feature vectors.
		heads: int, default 4
		The number of multi-head-attentions.
		dropout_rate: float, default 0.0
		The dropout probability for each convolutional layer.
		num_conv: int, default 3
		The number of convolutional layers.
		predicator_hidden_feats: int, default 32
		The size for hidden representations in the output MLP predictor, default to 32.
		n_tasks: int, default 1
		The number of the output size, default to 1.
		"""
		try:
		from torch_geometric.nn import GATConv, global_mean_pool
		@@ -97,20 +121,27 @@ class GAT(nn.Module):


		class GATModel(TorchModel):
		"""Graph Attention Networks.

		TODO: add more docstring
		"""Graph Attention Networks (GAT).

		Here is a simple example of code that uses the GATModel with
		molecules dataset.

		>> import deepchem as dc
		>> dataset_config = {"reload": False, "featurizer": dc.feat.MolGraphConvFeaturizer, "transformers": []}
		>> featurizer = dc.feat.MolGraphConvFeaturizer()
		>> dataset_config = {"reload": False, "featurizer": featurizer, "transformers": []}
		>> tasks, datasets, transformers = dc.molnet.load_tox21(**dataset_config)
		>> train, valid, test = datasets
		>> model = dc.models.GATModel(loss=dc.models.losses.(), batch_size=32, learning_rate=0.001)
		>> model = dc.models.GATModel(loss=dc.models.losses.SoftmaxCrossEntropy(), batch_size=32, learning_rate=0.001)
		>> model.fit(train, nb_epoch=50)

		This model takes arbitary graphs as an input, and predict graph properties. This model is
		one of variants of Graph Convolutional Networks. The main difference between basic GCN models
		is how to update node representations. The GAT uses multi head attention mechanisms which
		outbroke in NLP like Transformer when updating node representations. The most important advantage
		of this approach is that we can get the interpretability like how the model predict the value
		or which part of the graph structure is important from attention-weight. Please confirm
		the detail algorithms from [1]_.

		References
		----------
		.. [1] Veličković, Petar, et al. "Graph attention networks." arXiv preprint
		@@ -122,7 +153,7 @@ class GATModel(TorchModel):
		"""

		def __init__(self,
		in_node_dim: int = 25,
		in_node_dim: int = 38,
		hidden_node_dim: int = 64,
		heads: int = 4,
		dropout_rate: float = 0.0,
		@@ -131,7 +162,27 @@ class GATModel(TorchModel):
		n_tasks: int = 1,
		**kwargs):
		"""
		TODO: add docstring
		This class accepts all the keyword arguments from TorchModel.

		Parameters
		----------
		in_node_dim: int, default 38
		The length of the initial node feature vectors. The 38 is
		based on `MolGraphConvFeaturizer`.
		hidden_node_dim: int, default 64
		The length of the hidden node feature vectors.
		heads: int, default 4
		The number of multi-head-attentions.
		dropout_rate: float, default 0.0
		The dropout probability for each convolutional layer.
		num_conv: int, default 3
		The number of convolutional layers.
		predicator_hidden_feats: int, default 32
		The size for hidden representations in the output MLP predictor, default to 32.
		n_tasks: int, default 1
		The number of the output size, default to 1.
		kwargs: Dict
		This class accepts all the keyword arguments from TorchModel.
		"""
		model = GAT(
		in_node_dim,
		@@ -160,10 +211,6 @@ class GATModel(TorchModel):
		The labels converted to torch.Tensor.
		weights: List[torch.Tensor] or None
		The weights for each sample or sample/task pair converted to torch.Tensor.

		Notes
		-----
		This class requires PyTorch Geometric to be installed.
		"""
		try:
		from torch_geometric.data import Batch

deepchem/models/torch_models/torch_model.py

+3 −2

Original line number	Diff line number	Diff line
		@@ -9,8 +9,6 @@ try:
		except:
		from collections import Sequence as SequenceCollection

		logger = logging.getLogger(__name__)

		from deepchem.data import Dataset, NumpyDataset
		from deepchem.metrics import Metric
		from deepchem.models.losses import Loss
		@@ -40,6 +38,9 @@ def is_wandb_available():
		return _has_wandb


		logger = logging.getLogger(__name__)


		class TorchModel(Model):
		"""This is a DeepChem model implemented by a PyTorch model.

Admin message