:construction: wip commit (767e05c6) · Commits · 钟慕尧 / deepchem

deepchem/feat/material_featurizers/cgcnn_featurizer.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -50,14 +50,14 @@ class CGCNNFeaturizer(MaterialStructureFeaturizer):

		def __init__(self,
		radius: float = 8.0,
		max_neighbors: float = 8,
		max_neighbors: float = 12,
		step: float = 0.2):
		"""
		Parameters
		----------
		radius: float (default 8.0)
		Radius of sphere for finding neighbors of atoms in unit cell.
		max_neighbors: int (default 8)
		max_neighbors: int (default 12)
		Maximum number of neighbors to consider when constructing graph.
		step: float (default 0.2)
		Step size for Gaussian filter. This value is used when building edge features.

deepchem/models/tests/test_gat.py

+22 −3

Original line number	Diff line number	Diff line
		import unittest

		from deepchem.feat import MolGraphConvFeaturizer
		from deepchem.models import GATModel, losses
		from deepchem.models import GATModel
		from deepchem.models.tests.test_graph_models import get_dataset

		try:
		@@ -14,7 +14,7 @@ except:

		@unittest.skipIf(not has_pytorch_and_pyg,
		'PyTorch and PyTorch Geometric are not installed')
		def test_gat_classification():
		def test_gat_regression():
		# load datasets
		featurizer = MolGraphConvFeaturizer()
		tasks, dataset, transformers, metric = get_dataset(
		@@ -23,10 +23,29 @@ def test_gat_classification():
		# initialize models
		n_tasks = len(tasks)
		model = GATModel(
		n_tasks=n_tasks, loss=losses.L2Loss(), batch_size=4, learning_rate=0.001)
		mode='regression', n_tasks=n_tasks, batch_size=4, learning_rate=0.001)

		# overfit test
		model.fit(dataset, nb_epoch=100)
		scores = model.evaluate(dataset, [metric], transformers)
		# TODO: check this asseration is correct or not
		assert scores['mean_absolute_error'] < 1.0


		@unittest.skipIf(not has_pytorch_and_pyg,
		'PyTorch and PyTorch Geometric are not installed')
		def test_gat_classification():
		# load datasets
		featurizer = MolGraphConvFeaturizer()
		tasks, dataset, transformers, metric = get_dataset(
		'classification', featurizer=featurizer)

		# initialize models
		n_tasks = len(tasks)
		model = GATModel(
		mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001)

		# overfit test
		model.fit(dataset, nb_epoch=10)
		scores = model.evaluate(dataset, [metric], transformers)
		assert scores['mean-roc_auc_score'] >= 0.9

deepchem/models/torch_models/cgcnn.py

+16 −10

Original line number	Diff line number	Diff line
		@@ -69,11 +69,11 @@ class CGCNNLayer(nn.Module):
		return {'gated_z': gated_z, 'message_z': message_z}

		def reduce_func(self, nodes):
		new_h = nodes.data['x'] + torch.sum(
		nodes.mailbox['gated_z'] * nodes.mailbox['message_z'], dim=1)
		return {'x': new_h}
		nbr_sumed = torch.sum(nodes.mailbox['gated_z'] * nodes.mailbox['message_z'], dim=1)
		new_x = F.softplus(nodes.data['x'] + nbr_sumed)
		return {'new_x': new_x}

		def forward(self, dgl_graph):
		def forward(self, dgl_graph, node_feats, edge_feats):
		"""Update node representaions.

		Parameters
		@@ -87,10 +87,13 @@ class CGCNNLayer(nn.Module):
		dgl_graph: DGLGraph
		DGLGraph for a batch of updated graphs.
		"""
		dgl_graph.ndata['x'] = node_feats
		dgl_graph.edata['edge_attr'] = edge_feats
		dgl_graph.update_all(self.message_func, self.reduce_func)
		node_feats = dgl_graph.ndata.pop('new_x')
		if self.batch_norm is not None:
		dgl_graph.ndata['x'] = self.batch_norm(dgl_graph.ndata['x'])
		return dgl_graph
		node_feats = self.batch_norm(node_feats)
		return node_feats, edge_feats


		class CGCNN(nn.Module):
		@@ -215,15 +218,18 @@ class CGCNN(nn.Module):
		"""
		graph = dgl_graph
		# embedding node features
		graph.ndata['x'] = self.embedding(graph.ndata['x'])
		node_feats = graph.ndata.pop('x')
		edge_feats = graph.edata.pop('edge_attr')
		node_feats = self.embedding(node_feats)

		# convolutional layer
		for conv in self.conv_layers:
		graph = conv(graph)
		node_feats, edge_feats = conv(graph, node_feats, edge_feats)

		# pooling
		graph_feat = self.pooling(graph, 'x')
		graph_feat = self.fc(graph_feat)
		graph.ndata['updated_x'] = node_feats
		graph_feat = F.softplus(self.pooling(graph, 'updated_x'))
		graph_feat = F.softplus(self.fc(graph_feat))
		out = self.out(graph_feat)

		if self.mode == 'regression':

deepchem/models/torch_models/gat.py

+45 −15

Original line number	Diff line number	Diff line
		"""
		This is a sample implementation for working PyTorch Geometric with DeepChem!
		"""
		import torch
		import torch.nn as nn
		import torch.nn.functional as F

		from deepchem.models.torch_models.torch_model import TorchModel
		from deepchem.models.losses import Loss, L2Loss, SparseSoftmaxCrossEntropy


		class GAT(nn.Module):
		@@ -55,6 +58,8 @@ class GAT(nn.Module):
		num_conv: int = 3,
		predictor_hidden_feats: int = 32,
		n_tasks: int = 1,
		mode: str = 'classification',
		n_classes: int = 2,
		):
		"""
		Parameters
		@@ -74,12 +79,20 @@ class GAT(nn.Module):
		The size for hidden representations in the output MLP predictor, default to 32.
		n_tasks: int, default 1
		The number of the output size, default to 1.
		mode: str, default 'regression'
		The model type, 'classification' or 'regression'.
		n_classes: int, default 2
		The number of classes to predict (only used in classification mode).
		"""
		super(GAT, self).__init__()
		try:
		from torch_geometric.nn import GATConv, global_mean_pool
		except:
		raise ValueError("This class requires PyTorch Geometric to be installed.")
		super(GAT, self).__init__()

		self.n_tasks = n_tasks
		self.mode = mode
		self.n_classes = n_classes
		self.embedding = nn.Linear(in_node_dim, hidden_node_dim)
		self.conv_layers = nn.ModuleList([
		GATConv(
		@@ -91,7 +104,10 @@ class GAT(nn.Module):
		])
		self.pooling = global_mean_pool
		self.fc = nn.Linear(hidden_node_dim, predictor_hidden_feats)
		if self.mode == 'regression':
		self.out = nn.Linear(predictor_hidden_feats, n_tasks)
		else:
		self.out = nn.Linear(predictor_hidden_feats, n_tasks * n_classes)

		def forward(self, data):
		"""Predict labels
		@@ -115,9 +131,17 @@ class GAT(nn.Module):

		# pooling
		graph_feat = self.pooling(node_feat, data.batch)
		graph_feat = self.fc(graph_feat)
		graph_feat = F.relu(self.fc(graph_feat))
		out = self.out(graph_feat)

		if self.mode == 'regression':
		return out
		else:
		logits = out.view(-1, self.n_tasks, self.n_classes)
		# for n_tasks == 1 case
		logits = torch.squeeze(logits)
		proba = F.softmax(logits)
		return proba, logits


		class GATModel(TorchModel):
		@@ -130,7 +154,7 @@ class GATModel(TorchModel):
		>> featurizer = dc.feat.MolGraphConvFeaturizer()
		>> tasks, datasets, transformers = dc.molnet.load_tox21(reload=False, featurizer=featurizer, transformers=[])
		>> train, valid, test = datasets
		>> model = dc.models.GATModel(loss=dc.models.losses.SoftmaxCrossEntropy(), batch_size=32, learning_rate=0.001)
		>> model = dc.models.GATModel(mode='classification', n_tasks=len(tasks), batch_size=32, learning_rate=0.001)
		>> model.fit(train, nb_epoch=50)

		This model takes arbitary graphs as an input, and predict graph properties. This model is
		@@ -159,6 +183,8 @@ class GATModel(TorchModel):
		num_conv: int = 3,
		predictor_hidden_feats: int = 32,
		n_tasks: int = 1,
		mode: str = 'regression',
		n_classes: int = 2,
		**kwargs):
		"""
		This class accepts all the keyword arguments from TorchModel.
		@@ -180,19 +206,23 @@ class GATModel(TorchModel):
		The size for hidden representations in the output MLP predictor, default to 32.
		n_tasks: int, default 1
		The number of the output size, default to 1.
		mode: str, default 'regression'
		The model type, 'classification' or 'regression'.
		n_classes: int, default 2
		The number of classes to predict (only used in classification mode).
		kwargs: Dict
		This class accepts all the keyword arguments from TorchModel.
		"""
		model = GAT(
		in_node_dim,
		hidden_node_dim,
		heads,
		dropout,
		num_conv,
		predictor_hidden_feats,
		n_tasks,
		)
		super(GATModel, self).__init__(model, **kwargs)
		model = GAT(in_node_dim, hidden_node_dim, heads, dropout, num_conv,
		predictor_hidden_feats, n_tasks, mode, n_classes)
		if mode == "regression":
		loss: Loss = L2Loss()
		output_types = ['prediction']
		else:
		loss = SparseSoftmaxCrossEntropy()
		output_types = ['prediction', 'loss']
		super(GATModel, self).__init__(
		model, loss=loss, output_types=output_types, **kwargs)

		def _prepare_batch(self, batch):
		"""Create batch data for GAT.

deepchem/models/torch_models/torch_model.py

+7 −6

Original line number	Diff line number	Diff line
		@@ -159,7 +159,6 @@ class TorchModel(Model):
		"""
		super(TorchModel, self).__init__(
		model_instance=model, model_dir=model_dir, **kwargs)
		self.model = model
		if isinstance(loss, Loss):
		self._loss_fn: LossFn = _StandardLoss(model, loss)
		else:
		@@ -179,7 +178,7 @@ class TorchModel(Model):
		else:
		device = torch.device('cpu')
		self.device = device
		self.model.to(device)
		self.model = model.to(device)

		# W&B logging
		if wandb and not _has_wandb:
		@@ -844,12 +843,14 @@ class TorchModel(Model):
		labels = [
		x.astype(np.float32) if x.dtype == np.float64 else x for x in labels
		]
		labels = [torch.as_tensor(x, device=self.device).float() for x in labels]
		labels = [torch.as_tensor(x, device=self.device) for x in labels]
		if weights is not None:
		weights = [
		x.astype(np.float32) if x.dtype == np.float64 else x for x in weights
		]
		weights = [torch.as_tensor(x, device=self.device).float() for x in weights]
		weights = [
		torch.as_tensor(x, device=self.device).float() for x in weights
		]

		return (inputs, labels, weights)

		@@ -1110,8 +1111,8 @@ class _StandardLoss(object):
		"""The implements the loss function for models that use a dc.models.losses.Loss."""

		def __init__(self, model: torch.nn.Module, loss: Loss) -> None:
		self.model = model
		self.loss = loss
		self.model = model # not used
		self.loss = loss # not used
		self.criterion = loss._create_pytorch_loss()

		def __call__(self, outputs: List, labels: List, weights: List) -> float:

Admin message