:ok_hand: update for review comments (1ef32ec1) · Commits · 钟慕尧 / deepchem

deepchem/feat/molecule_featurizers/mol_graph_conv_featurizer.py

+10 −10

Original line number	Diff line number	Diff line
		@@ -4,7 +4,7 @@ import numpy as np
		from deepchem.utils.typing import RDKitAtom, RDKitBond, RDKitMol
		from deepchem.feat.graph_data import GraphData
		from deepchem.feat.base_classes import MolecularFeaturizer
		from deepchem.utils.graph_conv_utils import get_atom_type_one_hot, \
		from deepchem.utils.molecule_feature_utils import get_atom_type_one_hot, \
		construct_hydrogen_bonding_info, get_atom_hydrogen_bonding_one_hot, \
		get_atom_is_in_aromatic_one_hot, get_atom_hybridization_one_hot, \
		get_atom_total_num_Hs_one_hot, get_atom_chirality_one_hot, get_atom_formal_charge, \
		@@ -85,7 +85,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		- Chirality: A one-hot vector of the chirality, "R" or "S".
		- Formal charge: Integer electronic charge.
		- Partial charge: Calculated partial charge.
		- Ring sizes: A one-hot vector of the number of rings (3-8) that include this atom.
		- Ring sizes: A one-hot vector of the size (3-8) of rings that include this atom.
		- Hybridization: A one-hot vector of "sp", "sp2", "sp3".
		- Hydrogen bonding: A one-hot vector of whether this atom is a hydrogen bond donor or acceptor.
		- Aromatic: A one-hot vector of whether the atom belongs to an aromatic ring.
		@@ -101,7 +101,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		- Stereo: A one-hot vector of the stereo configuration of a bond.

		If you want to know more details about features, please check the paper [1]_ and
		utilities in deepchem.utils.graph_conv_utils.py.
		utilities in deepchem.utils.molecule_feature_utils.py.

		Examples
		--------
		@@ -125,15 +125,15 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		This class requires RDKit to be installed.
		"""

		def __init__(self, add_self_loop: bool = False):
		def __init__(self, add_self_edges: bool = False):
		"""
		Parameters
		----------
		add_self_loop: bool, default False
		add_self_edges: bool, default False
		Whether to add self-connected edges or not. If you want to use DGL,
		you sometimes need to add explict self-connected edges.
		"""
		self.add_self_loop = add_self_loop
		self.add_self_edges = add_self_edges

		def _featurize(self, mol: RDKitMol) -> GraphData:
		"""Calculate molecule graph features from RDKit mol object.
		@@ -174,23 +174,23 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
		)

		# construct edge (bond) information
		src, dist, bond_features = [], [], []
		src, dest, bond_features = [], [], []
		for bond in mol.GetBonds():
		# add edge list considering a directed graph
		start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
		src += [start, end]
		dist += [end, start]
		dest += [end, start]
		bond_features += 2 * [_construct_bond_feature(bond)]

		if self.add_self_loop:
		num_atoms = mol.GetNumAtoms()
		src += [i for i in range(num_atoms)]
		dist += [i for i in range(num_atoms)]
		dest += [i for i in range(num_atoms)]
		# add dummy edge features
		bond_fea_length = len(bond_features[0])
		bond_features += num_atoms * [[0 for _ in range(bond_fea_length)]]

		return GraphData(
		node_features=atom_features,
		edge_index=np.array([src, dist], dtype=np.int),
		edge_index=np.array([src, dest], dtype=np.int),
		edge_features=np.array(bond_features, dtype=np.float))

deepchem/feat/tests/test_mol_graph_conv_featurizer.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -25,7 +25,7 @@ class TestMolGraphConvFeaturizer(unittest.TestCase):

		def test_featurizer_with_self_loop(self):
		smiles = ["C1=CC=CN=C1", "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"]
		featurizer = MolGraphConvFeaturizer(add_self_loop=True)
		featurizer = MolGraphConvFeaturizer(add_self_edges=True)
		graph_feat = featurizer.featurize(smiles)
		assert len(graph_feat) == 2

deepchem/models/torch_models/cgcnn.py

+8 −8

Original line number	Diff line number	Diff line
		@@ -144,7 +144,7 @@ class CGCNN(nn.Module):
		hidden_node_dim: int = 64,
		in_edge_dim: int = 41,
		num_conv: int = 3,
		predicator_hidden_feats: int = 128,
		predictor_hidden_feats: int = 128,
		n_tasks: int = 1,
		mode: str = 'regression',
		n_classes: int = 2,
		@@ -162,7 +162,7 @@ class CGCNN(nn.Module):
		based on default setting of CGCNNFeaturizer.
		num_conv: int, default 3
		The number of convolutional layers.
		predicator_hidden_feats: int, default 128
		predictor_hidden_feats: int, default 128
		The size for hidden representations in the output MLP predictor.
		n_tasks: int, default 1
		The number of the output size.
		@@ -190,11 +190,11 @@ class CGCNN(nn.Module):
		batch_norm=True) for _ in range(num_conv)
		])
		self.pooling = dgl.mean_nodes
		self.fc = nn.Linear(hidden_node_dim, predicator_hidden_feats)
		self.fc = nn.Linear(hidden_node_dim, predictor_hidden_feats)
		if self.mode == 'regression':
		self.out = nn.Linear(predicator_hidden_feats, n_tasks)
		self.out = nn.Linear(predictor_hidden_feats, n_tasks)
		else:
		self.out = nn.Linear(predicator_hidden_feats, n_tasks * n_classes)
		self.out = nn.Linear(predictor_hidden_feats, n_tasks * n_classes)

		def forward(self, dgl_graph):
		"""Predict labels
		@@ -276,7 +276,7 @@ class CGCNNModel(TorchModel):
		hidden_node_dim: int = 64,
		in_edge_dim: int = 41,
		num_conv: int = 3,
		predicator_hidden_feats: int = 128,
		predictor_hidden_feats: int = 128,
		n_tasks: int = 1,
		mode: str = 'regression',
		n_classes: int = 2,
		@@ -296,7 +296,7 @@ class CGCNNModel(TorchModel):
		based on default setting of CGCNNFeaturizer.
		num_conv: int, default 3
		The number of convolutional layers.
		predicator_hidden_feats: int, default 128
		predictor_hidden_feats: int, default 128
		The size for hidden representations in the output MLP predictor.
		n_tasks: int, default 1
		The number of the output size.
		@@ -308,7 +308,7 @@ class CGCNNModel(TorchModel):
		This class accepts all the keyword arguments from TorchModel.
		"""
		model = CGCNN(in_node_dim, hidden_node_dim, in_edge_dim, num_conv,
		predicator_hidden_feats, n_tasks, mode, n_classes)
		predictor_hidden_feats, n_tasks, mode, n_classes)
		if mode == "regression":
		loss: Loss = L2Loss()
		output_types = ['prediction']

deepchem/models/torch_models/gat.py

+9 −10

Original line number	Diff line number	Diff line
		@@ -51,9 +51,9 @@ class GAT(nn.Module):
		in_node_dim: int = 38,
		hidden_node_dim: int = 64,
		heads: int = 4,
		dropout_rate: float = 0.0,
		dropout: float = 0.0,
		num_conv: int = 3,
		predicator_hidden_feats: int = 32,
		predictor_hidden_feats: int = 32,
		n_tasks: int = 1,
		):
		"""
		@@ -66,11 +66,11 @@ class GAT(nn.Module):
		The length of the hidden node feature vectors.
		heads: int, default 4
		The number of multi-head-attentions.
		dropout_rate: float, default 0.0
		dropout: float, default 0.0
		The dropout probability for each convolutional layer.
		num_conv: int, default 3
		The number of convolutional layers.
		predicator_hidden_feats: int, default 32
		predictor_hidden_feats: int, default 32
		The size for hidden representations in the output MLP predictor, default to 32.
		n_tasks: int, default 1
		The number of the output size, default to 1.
		@@ -87,7 +87,7 @@ class GAT(nn.Module):
		out_channels=hidden_node_dim,
		heads=heads,
		concat=False,
		dropout=dropout_rate) for _ in range(num_conv)
		dropout=dropout) for _ in range(num_conv)
		])
		self.pooling = global_mean_pool
		self.fc = nn.Linear(hidden_node_dim, predicator_hidden_feats)
		@@ -128,8 +128,7 @@ class GATModel(TorchModel):

		>> import deepchem as dc
		>> featurizer = dc.feat.MolGraphConvFeaturizer()
		>> dataset_config = {"reload": False, "featurizer": featurizer, "transformers": []}
		>> tasks, datasets, transformers = dc.molnet.load_tox21(**dataset_config)
		>> tasks, datasets, transformers = dc.molnet.load_tox21(reload=False, featurizer=featurizer, transformers=[])
		>> train, valid, test = datasets
		>> model = dc.models.GATModel(loss=dc.models.losses.SoftmaxCrossEntropy(), batch_size=32, learning_rate=0.001)
		>> model.fit(train, nb_epoch=50)
		@@ -156,7 +155,7 @@ class GATModel(TorchModel):
		in_node_dim: int = 38,
		hidden_node_dim: int = 64,
		heads: int = 4,
		dropout_rate: float = 0.0,
		dropout: float = 0.0,
		num_conv: int = 3,
		predicator_hidden_feats: int = 32,
		n_tasks: int = 1,
		@@ -173,7 +172,7 @@ class GATModel(TorchModel):
		The length of the hidden node feature vectors.
		heads: int, default 4
		The number of multi-head-attentions.
		dropout_rate: float, default 0.0
		dropout: float, default 0.0
		The dropout probability for each convolutional layer.
		num_conv: int, default 3
		The number of convolutional layers.
		@@ -188,7 +187,7 @@ class GATModel(TorchModel):
		in_node_dim,
		hidden_node_dim,
		heads,
		dropout_rate,
		dropout,
		num_conv,
		predicator_hidden_feats,
		n_tasks,

deepchem/utils/graph_conv_utils.py→deepchem/utils/molecule_feature_utils.py

+0 −0

File moved.

View file

Admin message