Commit 3ea6fae3 authored by Vignesh's avatar Vignesh
Browse files

Running example model using HAGCN

parent ebd100d6
Loading
Loading
Loading
Loading
+4 −49
Original line number Diff line number Diff line
@@ -23,7 +23,6 @@ class AdaptiveFilter(Layer):
               num_nodes,
               num_node_features,
               batch_size=64,
               k=1,
               init='glorot_uniform',
               combine_method='linear',
               **kwargs):
@@ -36,8 +35,6 @@ class AdaptiveFilter(Layer):
        Number of features per node in the graph
      batch_size: int, optional
        Batch size used for training
      k: int, optional
        k-hop neighborhood to look at
      init: str, optional
        Initialization method for the weights
      combine_method: str, optional
@@ -47,7 +44,6 @@ class AdaptiveFilter(Layer):

    if combine_method not in ['linear', 'prod']:
      raise ValueError('Combine method needs to be one of linear or product')
    self.k = k
    self.num_nodes = num_nodes
    self.num_node_features = num_node_features
    self.batch_size = batch_size
@@ -66,21 +62,6 @@ class AdaptiveFilter(Layer):

    self.trainable_weights = [self.Q]

  @staticmethod
  def pow_k(inputs, k=1):
    """Computes the kth power of inputs, used for adjacency matrix"""
    if k == 1:
      return inputs
    if k == 0:
      return tf.ones(inputs.shape)

    if k % 2 == 0:
      first_half = AdaptiveFilter.pow_k(inputs, int(k / 2))
      second_half = AdaptiveFilter.pow_k(inputs, int(k / 2))
      return tf.matmul(first_half, second_half)
    else:
      return tf.matmul(inputs, AdaptiveFilter.pow_k(inputs, int((k - 1) / 2)))

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    act_fn = activations.get('sigmoid')
    if in_layers is None:
@@ -88,11 +69,8 @@ class AdaptiveFilter(Layer):
    in_layers = convert_to_layers(in_layers)
    self._build()

    A = in_layers[0].out_tensor
    A_tilda_k = in_layers[0].out_tensor
    X = in_layers[1].out_tensor
    A_k = AdaptiveFilter.pow_k(A, k=self.k)
    I = tf.eye(num_rows=self.num_nodes, batch_shape=[self.batch_size])
    A_tilda_k = tf.minimum(A_k + I, 1)

    if self.combine_method == "linear":
      concatenated = tf.concat([A_tilda_k, X], axis=2)
@@ -126,7 +104,6 @@ class KOrderGraphConv(Layer):
               num_nodes,
               num_node_features,
               batch_size=64,
               k=1,
               init='glorot_uniform',
               **kwargs):
    """
@@ -138,8 +115,6 @@ class KOrderGraphConv(Layer):
        Number of features per node in the graph
      batch_size: int, optional
        Batch size used for training
      k: int, optional
        k-hop neighborhood to look at
      init: str, optional
        Initialization method for the weights
      combine_method: str, optional
@@ -149,7 +124,6 @@ class KOrderGraphConv(Layer):

    self.num_nodes = num_nodes
    self.num_node_features = num_node_features
    self.k = k
    self.batch_size = batch_size
    self.init = initializations.get(init)

@@ -163,40 +137,21 @@ class KOrderGraphConv(Layer):

    self.trainable_weights = [self.W, self.b]

  @staticmethod
  def pow_k(inputs, k=1):
    """Computes the kth power of inputs, used for adjacency matrix"""
    if k == 1:
      return inputs
    if k == 0:
      return tf.ones(inputs.shape)

    if k % 2 == 0:
      first_half = KOrderGraphConv.pow_k(inputs, int(k / 2))
      second_half = KOrderGraphConv.pow_k(inputs, int(k / 2))
      return tf.matmul(first_half, second_half)
    else:
      return tf.matmul(inputs, KOrderGraphConv.pow_k(inputs, int((k - 1) / 2)))

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)
    self._build()

    A = in_layers[0].out_tensor
    A_tilda_k = in_layers[0].out_tensor
    X = in_layers[1].out_tensor
    adp_fn_val = in_layers[2].out_tensor

    A_k = KOrderGraphConv.pow_k(A, k=self.k)
    I = tf.eye(num_rows=self.num_nodes, batch_shape=[self.batch_size])
    A_tilda_k = tf.minimum(A_k + I, 1)

    attn_weights = tf.multiply(adp_fn_val, self.W)
    wt_adjacency = attn_weights * A_tilda_k
    out = tf.matmul(wt_adjacency, X) + self.b
    out_tensor = out
    out = tf.matmul(wt_adjacency, X) + tf.expand_dims(self.b, axis=1)

    out_tensor = out
    if set_tensors:
      self.variables = self.trainable_weights
      self.out_tensor = out_tensor
+227 −0
Original line number Diff line number Diff line
""" High-Order and Adaptive Graph Convolutional Network (HA-GCN) model, defined in https://arxiv.org/pdf/1706.09916"""

from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function

__author__ = "Vignesh Ram Somnath"
__license__ = "MIT"

import numpy as np
import tensorflow as tf

from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.layers import Feature, Label, Weights
from deepchem.models.tensorgraph.layers import Concat
from deepchem.models.tensorgraph.layers import ReduceSum, Dense, ReLU, Flatten, Reshape
from deepchem.models.tensorgraph.layers import L2Loss, WeightedError
from deepchem.feat.mol_graphs import ConvMol
from hagcn_layers import KOrderGraphConv, AdaptiveFilter


class HAGCN(TensorGraph):

  def __init__(self,
               max_nodes,
               num_node_features,
               n_tasks=1,
               k_max=1,
               task_mode='graph',
               combine_method='linear',
               **kwargs):
    """
      Parameters
      ----------
      max_nodes: int
        Maximum number of nodes (atoms) graphs in dataset can have
      num_node_features: int
        Number of features per node
      atoms: list
        List of atoms available across train, valid, test
      k_max: int, optional
        Largest k-hop neighborhood per atom
      batch_size: int, optional
        Batch size used
      task_mode: str, optional
        Whether the model is used for node based tasks or edge based tasks or graph tasks
      combine_method: str, optional
        Combining the inputs for the AdaptiveFilterLayer
    """

    if task_mode not in ['graph', 'node', 'edge']:
      raise ValueError('task_mode must be one of graph, node, edge')

    self.k_max = k_max
    self.n_tasks = n_tasks
    self.max_nodes = max_nodes
    self.num_node_features = num_node_features
    self.task_mode = task_mode
    self.combine_method = combine_method
    super(HAGCN, self).__init__(**kwargs)

    self._build()

  def _build(self):
    self.A_tilda_k = list()
    for k in range(1, self.k_max + 1):
      self.A_tilda_k.append(
          Feature(
              name="graph_adjacency_{}".format(k),
              dtype=tf.float32,
              shape=[self.batch_size, self.max_nodes, self.max_nodes]))
    self.X = Feature(
        name='atom_features',
        dtype=tf.float32,
        shape=[self.batch_size, self.max_nodes, self.num_node_features])

    graph_layers = list()
    adaptive_filters = list()

    for index, k in enumerate(range(1, self.k_max + 1)):

      in_layers = [self.A_tilda_k[index], self.X]

      adaptive_filters.append(
          AdaptiveFilter(
              batch_size=self.batch_size,
              in_layers=in_layers,
              num_nodes=self.max_nodes,
              num_node_features=self.num_node_features,
              combine_method=self.combine_method))

      graph_layers.append(
          KOrderGraphConv(
              batch_size=self.batch_size,
              in_layers=in_layers + [adaptive_filters[index]],
              num_nodes=self.max_nodes,
              num_node_features=self.num_node_features,
              init='glorot_uniform'))

    graph_features = Concat(in_layers=graph_layers, axis=2)
    graph_features = ReLU(in_layers=[graph_features])
    flattened = Flatten(in_layers=[graph_features])

    dense1 = Dense(
        in_layers=[flattened], out_channels=64, activation_fn=tf.nn.relu)
    dense2 = Dense(
        in_layers=[dense1], out_channels=16, activation_fn=tf.nn.relu)
    dense3 = Dense(
        in_layers=[dense2], out_channels=1 * self.n_tasks, activation_fn=None)
    output = Reshape(in_layers=[dense3], shape=(-1, self.n_tasks, 1))
    self.add_output(output)

    label = Label(shape=(None, self.n_tasks, 1))
    weights = Weights(shape=(None, self.n_tasks))
    loss = ReduceSum(L2Loss(in_layers=[label, output]))

    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)

  @staticmethod
  def pow_k(inputs, k=1):
    """Computes the kth power of inputs, used for adjacency matrix"""
    if k == 1:
      return inputs
    if k == 0:
      return np.ones(inputs.shape)

    if k % 2 == 0:
      half = HAGCN.pow_k(inputs, k=k // 2)
      return np.matmul(half, half)
    else:
      return np.matmul(inputs, HAGCN.pow_k(inputs, (k - 1) // 2))

  def compute_adjacency_matrix(self, mol):
    """Computes the adjacency matrix for a mol."""
    assert isinstance(mol, ConvMol)
    canon_adj_lists = mol.get_adjacency_list()
    adjacency = np.zeros((self.max_nodes, self.max_nodes))
    for atom_idx, connections in enumerate(canon_adj_lists):
      for neighbor_idx in connections:
        adjacency[atom_idx, neighbor_idx] = 1
    return adjacency

  @staticmethod
  def compute_a_tilda_k(inputs, k=1):
    A_k = HAGCN.pow_k(inputs, k)
    A_k_I = A_k + np.eye(inputs.shape[-1])
    A_tilda_k = np.minimum(A_k_I, 1)
    return A_tilda_k

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        feed_dict = {}
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        if y_b is not None:
          feed_dict[self.labels[0]] = y_b

        atom_features = list()
        A_tilda_k = [[] for _ in range(1, self.k_max + 1)]

        for im, mol in enumerate(X_b):
          # Atom features with padding
          num_atoms = mol.get_num_atoms()
          atom_feats = mol.get_atom_features()
          num_to_pad = self.max_nodes - num_atoms
          if num_to_pad > 0:
            to_pad = np.zeros((num_to_pad, self.num_node_features))
            atom_feats = np.concatenate([atom_feats, to_pad], axis=0)
          atom_features.append(atom_feats)

          # A_tilda_k computation
          adjacency = self.compute_adjacency_matrix(mol)
          for i, k in enumerate(range(1, self.k_max + 1)):
            A_tilda_k[i].append(HAGCN.compute_a_tilda_k(adjacency, k=k))

        # Final feed_dict setup
        atom_features = np.asarray(atom_features)
        for i, k in enumerate(range(1, self.k_max + 1)):
          val = np.asarray(A_tilda_k[i])
          # assert val.shape == (self.batch_size, self.max_nodes, self.max_nodes)
          feed_dict[self.A_tilda_k[i]] = val
        #assert atom_features.shape == (self.batch_size, self.max_nodes,
        #                               self.num_node_features)
        feed_dict[self.X] = atom_features

        yield feed_dict

  def predict(self, dataset, transformers=[], outputs=None):
    """
    Uses self to make predictions on provided Dataset object.

    Parameters
    ----------
    dataset: dc.data.Dataset
      Dataset to make prediction on
    transformers: list
      List of dc.trans.Transformers.
    outputs: object
      If outputs is None, then will assume outputs=self.default_outputs. If outputs is
      a Layer/Tensor, then will evaluate and return as a single ndarray. If
      outputs is a list of Layers/Tensors, will return a list of ndarrays.

    Returns
    -------
    results: numpy ndarray or list of numpy ndarrays
    """
    generator = self.default_generator(dataset, predict=True, pad_batches=True)
    preds = self.predict_on_generator(generator, transformers, outputs)
    if len(dataset.y) % self.batch_size == 0:
      return preds
    else:
      after_pad = (len(dataset.y) // self.batch_size + 1) * self.batch_size
      closest = (len(dataset.y) // self.batch_size) * self.batch_size
      remainder = len(dataset.y) % self.batch_size
      num_added = after_pad - remainder - closest
      preds = preds[:-num_added]
      return preds
+46 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from hagcn_model import HAGCN

delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(
    featurizer='GraphConv', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

max_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_train, max_valid, max_test])

# Args
n_atom_feat = 75
batch_size = 128
k_max = 4

model = HAGCN(
    max_nodes=max_atoms,
    n_tasks=len(delaney_tasks),
    num_node_features=n_atom_feat,
    batch_size=batch_size,
    k_max=k_max)

model.fit(dataset=train_dataset, nb_epoch=80)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
 No newline at end of file