Commit 6fbe8494 authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Roitberg

parent 65d2b7c5
Loading
Loading
Loading
Loading
+187 −0
Original line number Diff line number Diff line
@@ -3071,3 +3071,190 @@ class BetaShare(Layer):

  def set_tensors(self, tensor):
    self.out_tensor, self.betas = tensor


class ANIFeat(Layer):
  """Performs transform from 3D coordinates to ANI symmetry functions
  """

  def __init__(self,
               in_layers,
               max_atoms=23,
               radial_cutoff=4.6,
               angular_cutoff=3.1,
               radial_length=32,
               angular_length=8,
               atom_cases=[1, 6, 7, 8, 16],
               atomic_number_differentiated=True,
               coordinates_in_bohr=True,
               **kwargs):
    """
    Only X can be transformed
    """
    self.max_atoms = max_atoms
    self.radial_cutoff = radial_cutoff
    self.angular_cutoff = angular_cutoff
    self.radial_length = radial_length
    self.angular_length = angular_length
    self.atom_cases = atom_cases
    self.atomic_number_differentiated = atomic_number_differentiated
    self.coordinates_in_bohr = coordinates_in_bohr
    super(ANIFeat, self).__init__(in_layers, **kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """
    In layers should be of shape dtype tf.float32, (None, self.max_atoms, 4)

    """
    inputs = self._get_input_tensors(in_layers)[0]
    atom_numbers = tf.cast(inputs[:, :, 0], tf.int32)
    flags = tf.sign(atom_numbers)
    flags = tf.to_float(tf.expand_dims(flags, 1) * tf.expand_dims(flags, 2))
    coordinates = inputs[:, :, 1:]
    if self.coordinates_in_bohr:
      coordinates = coordinates * 0.52917721092

    d = self.distance_matrix(coordinates, flags)
    
    d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags)
    d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags)

    radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
    angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers,
                                        coordinates)

    out_tensor = tf.concat(
        [
            tf.to_float(tf.expand_dims(atom_numbers, 2)),
            radial_sym,
            angular_sym
        ],
        axis=2)

    if set_tensors:
      self.out_tensor = out_tensor

    return out_tensor

  def distance_matrix(self, coordinates, flags):
    """ Generate distance matrix """
    # (TODO YTZ:) faster, less memory intensive way
    # r = tf.reduce_sum(tf.square(coordinates), 2)
    # r = tf.expand_dims(r, -1)
    # inner = 2*tf.matmul(coordinates, tf.transpose(coordinates, perm=[0,2,1]))
    # # inner = 2*tf.matmul(coordinates, coordinates, transpose_b=True)

    # d = r - inner + tf.transpose(r, perm=[0,2,1])
    # d = tf.nn.relu(d) # fix numerical instabilities about diagonal
    # d = tf.sqrt(d) # does this have negative elements? may be unstable for diagonals

    # d = d * flags

    max_atoms = self.max_atoms
    tensor1 = tf.stack([coordinates] * max_atoms, axis=1)
    tensor2 = tf.stack([coordinates] * max_atoms, axis=2)

    # Calculate pairwise distance
    d = tf.sqrt(tf.nn.relu(tf.reduce_sum(tf.squared_difference(tensor1,tensor2), axis=3)))
    # Masking for valid atom index
    d = d * flags
    return d

  def distance_cutoff(self, d, cutoff, flags):
    """ Generate distance matrix with trainable cutoff """
    # Cutoff with threshold Rc
    d_flag = flags * tf.sign(cutoff - d)
    d_flag = tf.nn.relu(d_flag)
    d_flag = d_flag * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
    d = 0.5 * (tf.cos(np.pi * d / cutoff) + 1)
    return d * d_flag
    # return d

  def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, -1)))
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2)

  def angular_symmetry(self, d_cutoff, d, atom_numbers, coordinates):
    """ Angular Symmetry Function """

    max_atoms = self.max_atoms
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.angular_cutoff, self.angular_length)
    ita = 3 / (Rs[1] - Rs[0])**2
    thetas = np.linspace(0., np.pi, self.angular_length)
    zeta = float(self.angular_length**2)

    ita, zeta, Rs, thetas = np.meshgrid(ita, zeta, Rs, thetas)
    zeta = tf.to_float(np.reshape(zeta, (1, 1, 1, 1, -1)))
    ita = tf.to_float(np.reshape(ita, (1, 1, 1, 1, -1)))
    Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, 1, -1)))
    thetas = tf.to_float(np.reshape(thetas, (1, 1, 1, 1, -1)))
    length = zeta.get_shape().as_list()[-1]


    # tf.stack issues again...
    vector_distances = tf.stack([coordinates] * max_atoms, 1) - tf.stack(
        [coordinates] * max_atoms, 2)
    R_ij = tf.stack([d] * max_atoms, axis=3)
    R_ik = tf.stack([d] * max_atoms, axis=2)
    f_R_ij = tf.stack([d_cutoff] * max_atoms, axis=3)
    f_R_ik = tf.stack([d_cutoff] * max_atoms, axis=2)

    # Define angle theta = arccos(R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance))
    vector_mul = tf.reduce_sum(tf.stack([vector_distances]*max_atoms, axis=3) * \
        tf.stack([vector_distances]*max_atoms, axis=2), axis=4)
    vector_mul = vector_mul * tf.sign(f_R_ij) * tf.sign(f_R_ik)
    theta = tf.acos(tf.div(vector_mul, R_ij * R_ik + 1e-5))

    R_ij = tf.stack([R_ij] * length, axis=4)
    R_ik = tf.stack([R_ik] * length, axis=4)
    f_R_ij = tf.stack([f_R_ij] * length, axis=4)
    f_R_ik = tf.stack([f_R_ik] * length, axis=4)
    theta = tf.stack([theta] * length, axis=4)

    out_tensor = tf.pow((1. + tf.cos(theta - thetas))/2., zeta) * \
        tf.exp(-ita * tf.square((R_ij + R_ik)/2. - Rs)) * f_R_ij * f_R_ik * 2

    if self.atomic_number_differentiated:
      out_tensors = []
      for id_j, atom_type_j in enumerate(self.atom_cases):
        for atom_type_k in self.atom_cases[id_j:]:
          selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
                           tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
          selected_atoms = tf.expand_dims(
              tf.expand_dims(selected_atoms, axis=1), axis=4)
          out_tensors.append(
              tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out_tensor, axis=(2, 3))

  def get_num_feats(self):
    n_feat = self.outputs.get_shape().as_list()[-1]
    return n_feat

+65 −7
Original line number Diff line number Diff line
@@ -6,9 +6,12 @@ Created on Thu Jul 6 20:31:47 2017
@author: zqwu
"""
import numpy as np
import scipy.optimize
import tensorflow as tf

from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack
import deepchem as dc

from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
@@ -100,13 +103,11 @@ class BPSymmetryFunctionRegression(TensorGraph):
        feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict


class ANIRegression(TensorGraph):

  def __init__(self,
               n_tasks,
               max_atoms,
               n_feat,
               layer_structures=[128, 64],
               atom_number_cases=[1, 6, 7, 8, 16],
               **kwargs):
@@ -122,17 +123,74 @@ class ANIRegression(TensorGraph):
    """
    self.n_tasks = n_tasks
    self.max_atoms = max_atoms
    self.n_feat = n_feat
    self.layer_structures = layer_structures
    self.atom_number_cases = atom_number_cases
    super(ANIRegression, self).__init__(**kwargs)
    self.build_graph()
    self.grad = None

  def compute_grad(self, dataset, batch_size=1):
    with self._get_tf("Graph").as_default():
      if not self.built:
        self.build()
      if not self.grad:
        self.grad = tf.gradients(self.outputs, self.atom_feats)
      feed_dict = dict()
      X = dataset.X
      flags = np.sign(np.array(X[:batch_size, :, 0]))
      feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
          np.stack([flags]*self.max_atoms, axis=1)
      feed_dict[self.atom_numbers] = np.array(X[:batch_size, :, 0], dtype=int)
      feed_dict[self.atom_feats] = np.array(X[:batch_size, :, :], dtype=float)
      return self.session.run([self.grad], feed_dict=feed_dict)

  def pred_one(self, X, atomic_nums):
    num_atoms = atomic_nums.shape[0]
    X = X.reshape((num_atoms, 3))
    A = atomic_nums.reshape((atomic_nums.shape[0], 1))
    Z = np.zeros((23, 4))
    Z[:X.shape[0], 1:X.shape[1]+1] = X
    Z[:A.shape[0], :A.shape[1]] = A
    X = Z
    dd = dc.data.NumpyDataset(np.array(X).reshape((1, 23, 4)), np.array(0), np.array(1))
    return self.predict(dd)[0]

  def grad_one(self, X, atomic_nums):
    y = self.pred_one(X, atomic_nums)
    num_atoms = atomic_nums.shape[0]
    X = X.reshape((num_atoms, 3))
    A = atomic_nums.reshape((atomic_nums.shape[0], 1))
    Z = np.zeros((23, 4))
    Z[:X.shape[0], 1:X.shape[1]+1] = X
    Z[:A.shape[0], :A.shape[1]] = A
    X = Z
    inp = np.array(X).reshape((1, 23, 4))
    dd = dc.data.NumpyDataset(inp, np.array([y]), np.array([1]))
    res = self.compute_grad(dd)[0][0][0]
    res = res[:num_atoms, 1:]
    return res.reshape((num_atoms*3,))

  def minimize_structure(self, X, atomic_nums):
    num_atoms = atomic_nums.shape[0]
    res = scipy.optimize.minimize(
      self.pred_one,
      X,
      args=(atomic_nums,),
      jac=self.grad_one,
      method="BFGS",
      tol=1e-6,
      options={'disp': True})
    return res.x.reshape((num_atoms, 3))

  def build_graph(self):

    self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))
    previous_layer = self.atom_feats
    self.atom_feats = Feature(shape=(None, self.max_atoms, 4))

    previous_layer = ANIFeat(self.atom_feats)

    self.featurized = previous_layer

    Hiddens = []
    for n_hidden in self.layer_structures:
@@ -188,5 +246,5 @@ class ANIRegression(TensorGraph):
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
        feed_dict[self.atom_feats] = np.array(X_b[:, :, :], dtype=float)
        yield feed_dict
+2 −1
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ class Splitter(object):

class RandomGroupSplitter(Splitter):

  def __init__(self, groups):
  def __init__(self, groups, *args, **kwargs):
    """
    A splitter class that splits on groupings. An example use case is when there
    are multiple conformations of the same molecule that share the same topology.
@@ -221,6 +221,7 @@ class RandomGroupSplitter(Splitter):

    """
    self.groups = groups
    super(RandomGroupSplitter, self).__init__(*args, **kwargs)

  def split(self,
            dataset,
+46 −0
Original line number Diff line number Diff line
from flask import request, abort, Flask

import flask

webapp = Flask(__name__)

@webapp.route('/potential', methods=["POST"])
def potential():
  content = request.get_json(force=True)
  if not content or not 'X' in content:
    abort(400)
  X = np.array(content['X'])
  x0 = X[:, 1:]
  a0 = X[:, :1]
  result = webapp.model.pred_one(x0, a0)
  return flask.jsonify({'y': result.tolist()[0]}), 200

@webapp.route('/gradient', methods=["POST"])
def index():
  content = request.get_json(force=True)
  if not content or not 'X' in content:
    abort(400)
  X = np.array(content['X'])
  num_atoms = X.shape[0]
  x0 = X[:, 1:]
  a0 = X[:, :1]

  res = webapp.model.grad_one(x0, a0)
  res = res.reshape((num_atoms, 3))

  return flask.jsonify({'grad': res.tolist()}), 200

@webapp.route('/minimize', methods=["POST"])
def minimize():
  content = request.get_json(force=True)
  if not content or not 'X' in content:
    abort(400)
  X = np.array(content['X'])
  num_atoms = X.shape[0]
  x0 = X[:, 1:]
  a0 = X[:, :1]

  res = webapp.model.minimize_structure(x0, a0)
  res = res.reshape((num_atoms, 3))

  return flask.jsonify({'X': res.tolist()}), 200
 No newline at end of file
+126 −0
Original line number Diff line number Diff line
# Written by Roman Zubatyuk and Justin S. Smith
import h5py
import numpy as np
import platform
import os

PY_VERSION = int(platform.python_version().split('.')[0]) > 3


class datapacker(object):

  def __init__(self, store_file, mode='w-', complib='gzip', complevel=6):
    """Wrapper to store arrays within HFD5 file
        """
    # opening file
    self.store = h5py.File(store_file, mode=mode)
    self.clib = complib
    self.clev = complevel

  def store_data(self, store_loc, **kwargs):
    """Put arrays to store
        """
    #print(store_loc)
    g = self.store.create_group(store_loc)
    for k, v, in kwargs.items():
      #print(type(v[0]))

      #print(k)
      if type(v) == list:
        if len(v) != 0:
          if type(v[0]) is np.str_ or type(v[0]) is str:
            v = [a.encode('utf8') for a in v]

      g.create_dataset(
          k, data=v, compression=self.clib, compression_opts=self.clev)

  def cleanup(self):
    """Wrapper to close HDF5 file
        """
    self.store.close()


class anidataloader(object):
  ''' Contructor '''

  def __init__(self, store_file):
    if not os.path.exists(store_file):
      exit('Error: file not found - ' + store_file)
    self.store = h5py.File(store_file)

  ''' Group recursive iterator (iterate through all groups in all branches and return datasets in dicts) '''

  def h5py_dataset_iterator(self, g, prefix=''):
    for key in g.keys():
      item = g[key]
      path = '{}/{}'.format(prefix, key)
      keys = [i for i in item.keys()]
      if isinstance(item[keys[0]], h5py.Dataset):  # test for dataset
        data = {'path': path}
        for k in keys:
          if not isinstance(item[k], h5py.Group):
            dataset = np.array(item[k].value)

            if type(dataset) is np.ndarray:
              if dataset.size != 0:
                if type(dataset[0]) is np.bytes_:
                  dataset = [a.decode('ascii') for a in dataset]

            data.update({k: dataset})

        yield data
      else:  # test for group (go down)
        yield from self.h5py_dataset_iterator(item, path)

  ''' Default class iterator (iterate through all data) '''

  def __iter__(self):
    for data in self.h5py_dataset_iterator(self.store):
      yield data

  ''' Returns a list of all groups in the file '''

  def get_group_list(self):
    return [g for g in self.store.values()]

  ''' Allows interation through the data in a given group '''

  def iter_group(self, g):
    for data in self.h5py_dataset_iterator(g):
      yield data

  ''' Returns the requested dataset '''

  def get_data(self, path, prefix=''):
    item = self.store[path]
    path = '{}/{}'.format(prefix, path)
    keys = [i for i in item.keys()]
    data = {'path': path}
    # print(path)
    for k in keys:
      if not isinstance(item[k], h5py.Group):
        dataset = np.array(item[k].value)

        if type(dataset) is np.ndarray:
          if dataset.size != 0:
            if type(dataset[0]) is np.bytes_:
              dataset = [a.decode('ascii') for a in dataset]

        data.update({k: dataset})
    return data

  ''' Returns the number of groups '''

  def group_size(self):
    return len(self.get_group_list())

  def size(self):
    count = 0
    for g in self.store.values():
      count = count + len(g.items())
    return count

  ''' Close the HDF5 file '''

  def cleanup(self):
    self.store.close()
 No newline at end of file
Loading