Commit 1ef32ec1 authored by nd-02110114's avatar nd-02110114
Browse files

👌 update for review comments

parent 47835b40
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ import numpy as np
from deepchem.utils.typing import RDKitAtom, RDKitBond, RDKitMol
from deepchem.feat.graph_data import GraphData
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.utils.graph_conv_utils import get_atom_type_one_hot, \
from deepchem.utils.molecule_feature_utils import get_atom_type_one_hot, \
  construct_hydrogen_bonding_info, get_atom_hydrogen_bonding_one_hot, \
  get_atom_is_in_aromatic_one_hot, get_atom_hybridization_one_hot, \
  get_atom_total_num_Hs_one_hot, get_atom_chirality_one_hot, get_atom_formal_charge, \
@@ -85,7 +85,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
  - Chirality: A one-hot vector of the chirality, "R" or "S".
  - Formal charge: Integer electronic charge.
  - Partial charge: Calculated partial charge.
  - Ring sizes: A one-hot vector of the number of rings (3-8) that include this atom.
  - Ring sizes: A one-hot vector of the size (3-8) of rings that include this atom.
  - Hybridization: A one-hot vector of "sp", "sp2", "sp3".
  - Hydrogen bonding: A one-hot vector of whether this atom is a hydrogen bond donor or acceptor.
  - Aromatic: A one-hot vector of whether the atom belongs to an aromatic ring.
@@ -101,7 +101,7 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
  - Stereo: A one-hot vector of the stereo configuration of a bond.

  If you want to know more details about features, please check the paper [1]_ and
  utilities in deepchem.utils.graph_conv_utils.py.
  utilities in deepchem.utils.molecule_feature_utils.py.

  Examples
  --------
@@ -125,15 +125,15 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
  This class requires RDKit to be installed.
  """

  def __init__(self, add_self_loop: bool = False):
  def __init__(self, add_self_edges: bool = False):
    """
    Parameters
    ----------
    add_self_loop: bool, default False
    add_self_edges: bool, default False
      Whether to add self-connected edges or not. If you want to use DGL,
      you sometimes need to add explict self-connected edges.
    """
    self.add_self_loop = add_self_loop
    self.add_self_edges = add_self_edges

  def _featurize(self, mol: RDKitMol) -> GraphData:
    """Calculate molecule graph features from RDKit mol object.
@@ -174,23 +174,23 @@ class MolGraphConvFeaturizer(MolecularFeaturizer):
    )

    # construct edge (bond) information
    src, dist, bond_features = [], [], []
    src, dest, bond_features = [], [], []
    for bond in mol.GetBonds():
      # add edge list considering a directed graph
      start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
      src += [start, end]
      dist += [end, start]
      dest += [end, start]
      bond_features += 2 * [_construct_bond_feature(bond)]

    if self.add_self_loop:
      num_atoms = mol.GetNumAtoms()
      src += [i for i in range(num_atoms)]
      dist += [i for i in range(num_atoms)]
      dest += [i for i in range(num_atoms)]
      # add dummy edge features
      bond_fea_length = len(bond_features[0])
      bond_features += num_atoms * [[0 for _ in range(bond_fea_length)]]

    return GraphData(
        node_features=atom_features,
        edge_index=np.array([src, dist], dtype=np.int),
        edge_index=np.array([src, dest], dtype=np.int),
        edge_features=np.array(bond_features, dtype=np.float))
+1 −1
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ class TestMolGraphConvFeaturizer(unittest.TestCase):

  def test_featurizer_with_self_loop(self):
    smiles = ["C1=CC=CN=C1", "O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C"]
    featurizer = MolGraphConvFeaturizer(add_self_loop=True)
    featurizer = MolGraphConvFeaturizer(add_self_edges=True)
    graph_feat = featurizer.featurize(smiles)
    assert len(graph_feat) == 2

+8 −8
Original line number Diff line number Diff line
@@ -144,7 +144,7 @@ class CGCNN(nn.Module):
      hidden_node_dim: int = 64,
      in_edge_dim: int = 41,
      num_conv: int = 3,
      predicator_hidden_feats: int = 128,
      predictor_hidden_feats: int = 128,
      n_tasks: int = 1,
      mode: str = 'regression',
      n_classes: int = 2,
@@ -162,7 +162,7 @@ class CGCNN(nn.Module):
      based on default setting of CGCNNFeaturizer.
    num_conv: int, default 3
      The number of convolutional layers.
    predicator_hidden_feats: int, default 128
    predictor_hidden_feats: int, default 128
      The size for hidden representations in the output MLP predictor.
    n_tasks: int, default 1
      The number of the output size.
@@ -190,11 +190,11 @@ class CGCNN(nn.Module):
            batch_norm=True) for _ in range(num_conv)
    ])
    self.pooling = dgl.mean_nodes
    self.fc = nn.Linear(hidden_node_dim, predicator_hidden_feats)
    self.fc = nn.Linear(hidden_node_dim, predictor_hidden_feats)
    if self.mode == 'regression':
      self.out = nn.Linear(predicator_hidden_feats, n_tasks)
      self.out = nn.Linear(predictor_hidden_feats, n_tasks)
    else:
      self.out = nn.Linear(predicator_hidden_feats, n_tasks * n_classes)
      self.out = nn.Linear(predictor_hidden_feats, n_tasks * n_classes)

  def forward(self, dgl_graph):
    """Predict labels
@@ -276,7 +276,7 @@ class CGCNNModel(TorchModel):
               hidden_node_dim: int = 64,
               in_edge_dim: int = 41,
               num_conv: int = 3,
               predicator_hidden_feats: int = 128,
               predictor_hidden_feats: int = 128,
               n_tasks: int = 1,
               mode: str = 'regression',
               n_classes: int = 2,
@@ -296,7 +296,7 @@ class CGCNNModel(TorchModel):
      based on default setting of CGCNNFeaturizer.
    num_conv: int, default 3
      The number of convolutional layers.
    predicator_hidden_feats: int, default 128
    predictor_hidden_feats: int, default 128
      The size for hidden representations in the output MLP predictor.
    n_tasks: int, default 1
      The number of the output size.
@@ -308,7 +308,7 @@ class CGCNNModel(TorchModel):
      This class accepts all the keyword arguments from TorchModel.
    """
    model = CGCNN(in_node_dim, hidden_node_dim, in_edge_dim, num_conv,
                  predicator_hidden_feats, n_tasks, mode, n_classes)
                  predictor_hidden_feats, n_tasks, mode, n_classes)
    if mode == "regression":
      loss: Loss = L2Loss()
      output_types = ['prediction']
+9 −10
Original line number Diff line number Diff line
@@ -51,9 +51,9 @@ class GAT(nn.Module):
      in_node_dim: int = 38,
      hidden_node_dim: int = 64,
      heads: int = 4,
      dropout_rate: float = 0.0,
      dropout: float = 0.0,
      num_conv: int = 3,
      predicator_hidden_feats: int = 32,
      predictor_hidden_feats: int = 32,
      n_tasks: int = 1,
  ):
    """
@@ -66,11 +66,11 @@ class GAT(nn.Module):
      The length of the hidden node feature vectors.
    heads: int, default 4
      The number of multi-head-attentions.
    dropout_rate: float, default 0.0
    dropout: float, default 0.0
      The dropout probability for each convolutional layer.
    num_conv: int, default 3
      The number of convolutional layers.
    predicator_hidden_feats: int, default 32
    predictor_hidden_feats: int, default 32
      The size for hidden representations in the output MLP predictor, default to 32.
    n_tasks: int, default 1
      The number of the output size, default to 1.
@@ -87,7 +87,7 @@ class GAT(nn.Module):
            out_channels=hidden_node_dim,
            heads=heads,
            concat=False,
            dropout=dropout_rate) for _ in range(num_conv)
            dropout=dropout) for _ in range(num_conv)
    ])
    self.pooling = global_mean_pool
    self.fc = nn.Linear(hidden_node_dim, predicator_hidden_feats)
@@ -128,8 +128,7 @@ class GATModel(TorchModel):

  >> import deepchem as dc
  >> featurizer = dc.feat.MolGraphConvFeaturizer()
  >> dataset_config = {"reload": False, "featurizer": featurizer, "transformers": []}
  >> tasks, datasets, transformers = dc.molnet.load_tox21(**dataset_config)
  >> tasks, datasets, transformers = dc.molnet.load_tox21(reload=False, featurizer=featurizer, transformers=[])
  >> train, valid, test = datasets
  >> model = dc.models.GATModel(loss=dc.models.losses.SoftmaxCrossEntropy(), batch_size=32, learning_rate=0.001)
  >> model.fit(train, nb_epoch=50)
@@ -156,7 +155,7 @@ class GATModel(TorchModel):
               in_node_dim: int = 38,
               hidden_node_dim: int = 64,
               heads: int = 4,
               dropout_rate: float = 0.0,
               dropout: float = 0.0,
               num_conv: int = 3,
               predicator_hidden_feats: int = 32,
               n_tasks: int = 1,
@@ -173,7 +172,7 @@ class GATModel(TorchModel):
      The length of the hidden node feature vectors.
    heads: int, default 4
      The number of multi-head-attentions.
    dropout_rate: float, default 0.0
    dropout: float, default 0.0
      The dropout probability for each convolutional layer.
    num_conv: int, default 3
      The number of convolutional layers.
@@ -188,7 +187,7 @@ class GATModel(TorchModel):
        in_node_dim,
        hidden_node_dim,
        heads,
        dropout_rate,
        dropout,
        num_conv,
        predicator_hidden_feats,
        n_tasks,
Loading