Commit 380b2146 authored by miaecle's avatar miaecle
Browse files

ANI regression

parent 94600fb9
Loading
Loading
Loading
Loading
+13 −41
Original line number Diff line number Diff line
@@ -118,9 +118,9 @@ class ANIRegression(TensorGraph):
  def __init__(self,
               n_tasks,
               max_atoms,
               n_feat=1120,
               n_hidden=40,
               n_embedding=10, 
               atom_number_cases=[1, 6, 7, 8],
               atom_number_cases=[1, 6, 7, 8, 16],
               **kwargs):
    """
    Parameters
@@ -135,7 +135,7 @@ class ANIRegression(TensorGraph):
    self.n_tasks = n_tasks
    self.max_atoms = max_atoms
    self.n_hidden = n_hidden
    self.n_embedding = n_embedding
    self.n_feat = n_feat
    self.atom_number_cases = atom_number_cases
    super(ANIRegression, self).__init__(**kwargs)
    self.build_graph()
@@ -143,43 +143,14 @@ class ANIRegression(TensorGraph):
  def build_graph(self):
    self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_coordinates = Feature(shape=(None, self.max_atoms, 3))

    distance_matrix = DistanceMatrix(
        self.max_atoms, in_layers=[self.atom_coordinates, self.atom_flags])
    distance_cutoff_radial = DistanceCutoff(
        self.max_atoms,
        cutoff=4.6 / 0.52917721092,
        in_layers=[distance_matrix, self.atom_flags])
    distance_cutoff_angular = DistanceCutoff(
        self.max_atoms,
        cutoff=3.1 / 0.52917721092,
        in_layers=[distance_matrix, self.atom_flags])
    radial_symmetry = RadialSymmetry(
        self.max_atoms,
        atomic_number_differentiated=True,
        atom_numbers=self.atom_number_cases,
        in_layers=[distance_cutoff_radial, distance_matrix, self.atom_numbers])
    angular_symmetry = AngularSymmetryMod(
        self.max_atoms,
        atomic_number_differentiated=True,
        atom_numbers=self.atom_number_cases,
        in_layers=[distance_cutoff_angular, distance_matrix, self.atom_coordinates, self.atom_numbers])
    atom_embedding = DTNNEmbedding(
        n_embedding=0, in_layers=[self.atom_numbers])

    feature_merge = BPFeatureMerge(
        self.max_atoms,
        in_layers=[
            atom_embedding, radial_symmetry, angular_symmetry, self.atom_flags
        ])
    self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))

    Hidden = AtomicDifferentiatedDense(
        self.max_atoms,
        self.n_hidden,
        self.atom_number_cases,
        activation='tanh',
        in_layers=[feature_merge, self.atom_numbers])
        in_layers=[self.atom_feats, self.atom_numbers])

    Hidden2 = AtomicDifferentiatedDense(
        self.max_atoms,
@@ -212,6 +183,7 @@ class ANIRegression(TensorGraph):
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      print('Starting epoch %i' % epoch)
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
@@ -228,5 +200,5 @@ class ANIRegression(TensorGraph):
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_coordinates] = np.array(X_b[:, :, 1:], dtype=float)
        feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict
 No newline at end of file
+2 −1
Original line number Diff line number Diff line
@@ -16,3 +16,4 @@ from deepchem.trans.transformers import PowerTransformer
from deepchem.trans.transformers import CoulombFitTransformer
from deepchem.trans.transformers import IRVTransformer
from deepchem.trans.transformers import DAGTransformer
from deepchem.trans.transformers import ANITransformer
+196 −19
Original line number Diff line number Diff line
@@ -925,3 +925,180 @@ class ImageTransformer(Transformer):
    images = [scipy.ndimage.imread(x, mode='RGB') for x in X]
    images = [scipy.misc.imresize(x, size=self.size) for x in images]
    return np.array(images), y, w

class ANITransformer(Transformer):
  """Performs transform from 3D coordinates to ANI symmetry functions
  """

  def __init__(self,
               max_atoms=23,
               radial_cutoff=4.6,
               angular_cutoff=3.1,
               radial_length=32,
               angular_length=8,
               atom_cases=[1, 6, 7, 8, 16],
               coordinates_in_bohr=True,
               transform_X=True,
               transform_y=False,
               transform_w=False):
    """
    Only X can be transformed
    """
    self.max_atoms = max_atoms
    self.radial_cutoff = radial_cutoff
    self.angular_cutoff = angular_cutoff
    self.radial_length = radial_length
    self.angular_length = angular_length
    self.atom_cases = atom_cases
    self.coordinates_in_bohr = coordinates_in_bohr
    self.transform_X = transform_X
    self.transform_y = transform_y
    self.transform_w = transform_w
    self.compute_graph = self.build()
    assert self.transform_X
    assert not self.transform_y
    assert not self.transform_w

  def transform_array(self, X, y, w):
    if self.transform_X:
      n_samples = X.shape[0]
      batches = np.linspace(0, n_samples, int(n_samples/100)).astype(int)
      X_out = []
      sess = tf.Session(graph=self.compute_graph)
      num_transformed = 0
      for i, start in enumerate(batches[:-1]):
        if start == batches[-1]:
          X_batch = X[start:]
        else:
          X_batch = X[start:batches[i+1]]
        output = sess.run([self.outputs], feed_dict={self.inputs: X_batch})[0]
        X_out.append(output)
        num_transformed = num_transformed + X_batch.shape[0]
        print('%i samples transformed' % num_transformed)
      X_new = np.concatenate(X_out, axis=0)
      assert X_new.shape[0] == X.shape[0]
    return (X_new, y, w)

  def untransform(self, z):
    raise NotImplementedError(
        "Cannot untransform datasets with ANITransformer.")

  def build(self):
    """ tensorflow computation graph for transform """
    graph = tf.Graph()
    with graph.as_default():
      self.inputs = tf.placeholder(tf.float32, shape=(None, self.max_atoms, 4))
      atom_numbers = tf.cast(self.inputs[:, :, 0], tf.int32)
      flags = tf.sign(atom_numbers)
      flags = tf.to_float(tf.expand_dims(flags, 1)*tf.expand_dims(flags, 2))
      coordinates = self.inputs[:, :, 1:]
      if self.coordinates_in_bohr:
        coordinates = coordinates * 0.52917721092
      d = self.distance_matrix(coordinates, flags)
      d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags)
      d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags)
      radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
      angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers, coordinates)
      self.outputs = tf.concat([tf.to_float(tf.expand_dims(atom_numbers, 2)),
                                radial_sym,
                                angular_sym], axis=2)
    return graph

  def distance_matrix(self, coordinates, flags):
    """ Generate distance matrix """
    max_atoms = self.max_atoms
    tensor1 = tf.stack([coordinates]*max_atoms, axis=1)
    tensor2 = tf.stack([coordinates]*max_atoms, axis=2)

    # Calculate pairwise distance
    d = tf.sqrt(tf.reduce_sum(tf.square(tensor1 - tensor2), axis=3))
    # Masking for valid atom index
    d = d * flags
    return d

  def distance_cutoff(self, d, cutoff, flags):
    """ Generate distance matrix with trainable cutoff """
    # Cutoff with threshold Rc
    d_flag = flags * tf.sign(cutoff - d)
    d_flag = tf.nn.relu(d_flag)
    d_flag = d_flag * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
    d = 0.5 * (tf.cos(np.pi * d / cutoff) + 1)
    return d * d_flag

  def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases)+1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    d_cutoff = tf.stack([d_cutoff] * self.radial_length, axis=3)
    d = tf.stack([d] * self.radial_length, axis=3)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 1.25 / (Rs[1] - Rs[0])**2
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, -1)))

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    out_tensors = []
    for atom_type in self.atom_cases:
      selected_atoms = tf.expand_dims(
          tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
          axis=3)
      out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
    return tf.concat(out_tensors, axis=2)


  def angular_symmetry(self, d_cutoff, d, atom_numbers, coordinates):
    """ Angular Symmetry Function """

    max_atoms = self.max_atoms
    embedding = tf.eye(np.max(self.atom_cases)+1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.angular_cutoff, self.angular_length)
    ita = 1.25 / (Rs[1] - Rs[0])**2
    thetas = np.linspace(0., np.pi, self.angular_length)
    zeta = float(self.angular_length**2)

    ita, zeta, Rs, thetas = np.meshgrid(ita, zeta, Rs, thetas)
    zeta = tf.to_float(np.reshape(zeta, (1, 1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, 1, -1)))
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, 1, -1)))
    thetas = tf.to_float(np.reshape(thetas, (1, 1, 1, 1, -1)))
    length = zeta.get_shape().as_list()[-1]

    vector_distances = tf.stack([coordinates]*max_atoms, 1) - tf.stack([coordinates]*max_atoms, 2)
    R_ij = tf.stack([d]*max_atoms, axis=3)
    R_ik = tf.stack([d]*max_atoms, axis=2)
    f_R_ij = tf.stack([d_cutoff]*max_atoms, axis=3)
    f_R_ik = tf.stack([d_cutoff]*max_atoms, axis=2)

    # Define angle theta = arccos(R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance))
    vector_mul = tf.reduce_sum(tf.stack([vector_distances]*max_atoms, axis=3) * \
        tf.stack([vector_distances]*max_atoms, axis=2), axis=4)
    vector_mul = vector_mul * tf.sign(f_R_ij) * tf.sign(f_R_ik)
    theta = tf.acos(tf.div(vector_mul, R_ij * R_ik + 1e-5))

    R_ij = tf.stack([R_ij] * length, axis=4)
    R_ik = tf.stack([R_ik] * length, axis=4)
    f_R_ij = tf.stack([f_R_ij] * length, axis=4)
    f_R_ik = tf.stack([f_R_ik] * length, axis=4)
    theta = tf.stack([theta] * length, axis=4)

    out_tensor = tf.pow((1. + tf.cos(theta - thetas))/2., zeta) * \
        tf.exp(-ita * tf.square((R_ij + R_ik)/2. - Rs)) * f_R_ij * f_R_ik

    out_tensors = []
    for id_j, atom_type_j in enumerate(self.atom_cases):
      for atom_type_k in self.atom_cases[id_j:]:
        selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
                         tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
        selected_atoms = tf.expand_dims(
              tf.expand_dims(selected_atoms, axis=1), axis=4)
        out_tensors.append(
              tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
    return tf.concat(out_tensors, axis=2)

  def get_num_feats(self):
    n_feat = self.outputs.get_shape().as_list()[-1]
    return n_feat
 No newline at end of file
+18 −8
Original line number Diff line number Diff line
@@ -16,12 +16,6 @@ tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
    featurizer='BPSymmetryFunction')
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

# Batch size of models
max_atoms = 23
n_hidden = 40
@@ -29,11 +23,27 @@ n_embedding = 0
batch_size = 16
atom_number_cases = [1, 6, 7, 8, 16]

ANItransformer = dc.trans.ANITransformer(max_atoms=max_atoms,
                                         atom_cases=atom_number_cases)
train_dataset = ANItransformer.transform(train_dataset)
valid_dataset = ANItransformer.transform(valid_dataset)
test_dataset = ANItransformer.transform(test_dataset)

# The first column is atom numbers
n_feat = ANItransformer.get_num_feats() - 1

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]


model = dc.models.ANIRegression(
    len(tasks),
    max_atoms,
    n_feat=n_feat,
    n_hidden=n_hidden,
    n_embedding=n_embedding,
    atom_number_cases=atom_number_cases,
    batch_size=batch_size,
    learning_rate=0.001,
@@ -41,7 +51,7 @@ model = dc.models.ANIRegression(
    mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=20, checkpoint_interval=10)
model.fit(train_dataset, nb_epoch=100, checkpoint_interval=100)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)