Commit 767e05c6 authored by nd-02110114's avatar nd-02110114
Browse files

🚧 wip commit

parent 3e03ad9a
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -50,14 +50,14 @@ class CGCNNFeaturizer(MaterialStructureFeaturizer):

  def __init__(self,
               radius: float = 8.0,
               max_neighbors: float = 8,
               max_neighbors: float = 12,
               step: float = 0.2):
    """
    Parameters
    ----------
    radius: float (default 8.0)
      Radius of sphere for finding neighbors of atoms in unit cell.
    max_neighbors: int (default 8)
    max_neighbors: int (default 12)
      Maximum number of neighbors to consider when constructing graph.
    step: float (default 0.2)
      Step size for Gaussian filter. This value is used when building edge features.
+22 −3
Original line number Diff line number Diff line
import unittest

from deepchem.feat import MolGraphConvFeaturizer
from deepchem.models import GATModel, losses
from deepchem.models import GATModel
from deepchem.models.tests.test_graph_models import get_dataset

try:
@@ -14,7 +14,7 @@ except:

@unittest.skipIf(not has_pytorch_and_pyg,
                 'PyTorch and PyTorch Geometric are not installed')
def test_gat_classification():
def test_gat_regression():
  # load datasets
  featurizer = MolGraphConvFeaturizer()
  tasks, dataset, transformers, metric = get_dataset(
@@ -23,10 +23,29 @@ def test_gat_classification():
  # initialize models
  n_tasks = len(tasks)
  model = GATModel(
      n_tasks=n_tasks, loss=losses.L2Loss(), batch_size=4, learning_rate=0.001)
      mode='regression', n_tasks=n_tasks, batch_size=4, learning_rate=0.001)

  # overfit test
  model.fit(dataset, nb_epoch=100)
  scores = model.evaluate(dataset, [metric], transformers)
  # TODO: check this asseration is correct or not
  assert scores['mean_absolute_error'] < 1.0


@unittest.skipIf(not has_pytorch_and_pyg,
                 'PyTorch and PyTorch Geometric are not installed')
def test_gat_classification():
  # load datasets
  featurizer = MolGraphConvFeaturizer()
  tasks, dataset, transformers, metric = get_dataset(
      'classification', featurizer=featurizer)

  # initialize models
  n_tasks = len(tasks)
  model = GATModel(
      mode='classification', n_tasks=n_tasks, batch_size=10, learning_rate=0.001)

  # overfit test
  model.fit(dataset, nb_epoch=10)
  scores = model.evaluate(dataset, [metric], transformers)
  assert scores['mean-roc_auc_score'] >= 0.9
+16 −10
Original line number Diff line number Diff line
@@ -69,11 +69,11 @@ class CGCNNLayer(nn.Module):
    return {'gated_z': gated_z, 'message_z': message_z}

  def reduce_func(self, nodes):
    new_h = nodes.data['x'] + torch.sum(
        nodes.mailbox['gated_z'] * nodes.mailbox['message_z'], dim=1)
    return {'x': new_h}
    nbr_sumed = torch.sum(nodes.mailbox['gated_z'] * nodes.mailbox['message_z'], dim=1)
    new_x = F.softplus(nodes.data['x'] + nbr_sumed)
    return {'new_x': new_x}

  def forward(self, dgl_graph):
  def forward(self, dgl_graph, node_feats, edge_feats):
    """Update node representaions.

    Parameters
@@ -87,10 +87,13 @@ class CGCNNLayer(nn.Module):
    dgl_graph: DGLGraph
      DGLGraph for a batch of updated graphs.
    """
    dgl_graph.ndata['x'] = node_feats
    dgl_graph.edata['edge_attr'] = edge_feats
    dgl_graph.update_all(self.message_func, self.reduce_func)
    node_feats = dgl_graph.ndata.pop('new_x')
    if self.batch_norm is not None:
      dgl_graph.ndata['x'] = self.batch_norm(dgl_graph.ndata['x'])
    return dgl_graph
      node_feats = self.batch_norm(node_feats)
    return node_feats, edge_feats


class CGCNN(nn.Module):
@@ -215,15 +218,18 @@ class CGCNN(nn.Module):
    """
    graph = dgl_graph
    # embedding node features
    graph.ndata['x'] = self.embedding(graph.ndata['x'])
    node_feats = graph.ndata.pop('x')
    edge_feats = graph.edata.pop('edge_attr')
    node_feats = self.embedding(node_feats)

    # convolutional layer
    for conv in self.conv_layers:
      graph = conv(graph)
      node_feats, edge_feats = conv(graph, node_feats, edge_feats)

    # pooling
    graph_feat = self.pooling(graph, 'x')
    graph_feat = self.fc(graph_feat)
    graph.ndata['updated_x'] = node_feats
    graph_feat = F.softplus(self.pooling(graph, 'updated_x'))
    graph_feat = F.softplus(self.fc(graph_feat))
    out = self.out(graph_feat)

    if self.mode == 'regression':
+45 −15
Original line number Diff line number Diff line
"""
This is a sample implementation for working PyTorch Geometric with DeepChem!
"""
import torch
import torch.nn as nn
import torch.nn.functional as F

from deepchem.models.torch_models.torch_model import TorchModel
from deepchem.models.losses import Loss, L2Loss, SparseSoftmaxCrossEntropy


class GAT(nn.Module):
@@ -55,6 +58,8 @@ class GAT(nn.Module):
      num_conv: int = 3,
      predictor_hidden_feats: int = 32,
      n_tasks: int = 1,
      mode: str = 'classification',
      n_classes: int = 2,
  ):
    """
    Parameters
@@ -74,12 +79,20 @@ class GAT(nn.Module):
      The size for hidden representations in the output MLP predictor, default to 32.
    n_tasks: int, default 1
      The number of the output size, default to 1.
    mode: str, default 'regression'
      The model type, 'classification' or 'regression'.
    n_classes: int, default 2
      The number of classes to predict (only used in classification mode).
    """
    super(GAT, self).__init__()
    try:
      from torch_geometric.nn import GATConv, global_mean_pool
    except:
      raise ValueError("This class requires PyTorch Geometric to be installed.")
    super(GAT, self).__init__()

    self.n_tasks = n_tasks
    self.mode = mode
    self.n_classes = n_classes
    self.embedding = nn.Linear(in_node_dim, hidden_node_dim)
    self.conv_layers = nn.ModuleList([
        GATConv(
@@ -91,7 +104,10 @@ class GAT(nn.Module):
    ])
    self.pooling = global_mean_pool
    self.fc = nn.Linear(hidden_node_dim, predictor_hidden_feats)
    if self.mode == 'regression':
      self.out = nn.Linear(predictor_hidden_feats, n_tasks)
    else:
      self.out = nn.Linear(predictor_hidden_feats, n_tasks * n_classes)

  def forward(self, data):
    """Predict labels
@@ -115,9 +131,17 @@ class GAT(nn.Module):

    # pooling
    graph_feat = self.pooling(node_feat, data.batch)
    graph_feat = self.fc(graph_feat)
    graph_feat = F.relu(self.fc(graph_feat))
    out = self.out(graph_feat)

    if self.mode == 'regression':
      return out
    else:
      logits = out.view(-1, self.n_tasks, self.n_classes)
      # for n_tasks == 1 case
      logits = torch.squeeze(logits)
      proba = F.softmax(logits)
      return proba, logits


class GATModel(TorchModel):
@@ -130,7 +154,7 @@ class GATModel(TorchModel):
  >> featurizer = dc.feat.MolGraphConvFeaturizer()
  >> tasks, datasets, transformers = dc.molnet.load_tox21(reload=False, featurizer=featurizer, transformers=[])
  >> train, valid, test = datasets
  >> model = dc.models.GATModel(loss=dc.models.losses.SoftmaxCrossEntropy(), batch_size=32, learning_rate=0.001)
  >> model = dc.models.GATModel(mode='classification', n_tasks=len(tasks), batch_size=32, learning_rate=0.001)
  >> model.fit(train, nb_epoch=50)

  This model takes arbitary graphs as an input, and predict graph properties. This model is
@@ -159,6 +183,8 @@ class GATModel(TorchModel):
               num_conv: int = 3,
               predictor_hidden_feats: int = 32,
               n_tasks: int = 1,
               mode: str = 'regression',
               n_classes: int = 2,
               **kwargs):
    """
    This class accepts all the keyword arguments from TorchModel.
@@ -180,19 +206,23 @@ class GATModel(TorchModel):
      The size for hidden representations in the output MLP predictor, default to 32.
    n_tasks: int, default 1
      The number of the output size, default to 1.
    mode: str, default 'regression'
      The model type, 'classification' or 'regression'.
    n_classes: int, default 2
      The number of classes to predict (only used in classification mode).
    kwargs: Dict
      This class accepts all the keyword arguments from TorchModel.
    """
    model = GAT(
        in_node_dim,
        hidden_node_dim,
        heads,
        dropout,
        num_conv,
        predictor_hidden_feats,
        n_tasks,
    )
    super(GATModel, self).__init__(model, **kwargs)
    model = GAT(in_node_dim, hidden_node_dim, heads, dropout, num_conv,
                predictor_hidden_feats, n_tasks, mode, n_classes)
    if mode == "regression":
      loss: Loss = L2Loss()
      output_types = ['prediction']
    else:
      loss = SparseSoftmaxCrossEntropy()
      output_types = ['prediction', 'loss']
    super(GATModel, self).__init__(
        model, loss=loss, output_types=output_types, **kwargs)

  def _prepare_batch(self, batch):
    """Create batch data for GAT.
+7 −6
Original line number Diff line number Diff line
@@ -159,7 +159,6 @@ class TorchModel(Model):
    """
    super(TorchModel, self).__init__(
        model_instance=model, model_dir=model_dir, **kwargs)
    self.model = model
    if isinstance(loss, Loss):
      self._loss_fn: LossFn = _StandardLoss(model, loss)
    else:
@@ -179,7 +178,7 @@ class TorchModel(Model):
      else:
        device = torch.device('cpu')
    self.device = device
    self.model.to(device)
    self.model = model.to(device)

    # W&B logging
    if wandb and not _has_wandb:
@@ -844,12 +843,14 @@ class TorchModel(Model):
      labels = [
          x.astype(np.float32) if x.dtype == np.float64 else x for x in labels
      ]
      labels = [torch.as_tensor(x, device=self.device).float() for x in labels]
      labels = [torch.as_tensor(x, device=self.device) for x in labels]
    if weights is not None:
      weights = [
          x.astype(np.float32) if x.dtype == np.float64 else x for x in weights
      ]
      weights = [torch.as_tensor(x, device=self.device).float() for x in weights]
      weights = [
          torch.as_tensor(x, device=self.device).float() for x in weights
      ]

    return (inputs, labels, weights)

@@ -1110,8 +1111,8 @@ class _StandardLoss(object):
  """The implements the loss function for models that use a dc.models.losses.Loss."""

  def __init__(self, model: torch.nn.Module, loss: Loss) -> None:
    self.model = model
    self.loss = loss
    self.model = model  # not used
    self.loss = loss  # not used
    self.criterion = loss._create_pytorch_loss()

  def __call__(self, outputs: List, labels: List, weights: List) -> float: