Roitberg (6fbe8494) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+187 −0

Original line number	Diff line number	Diff line
		@@ -3071,3 +3071,190 @@ class BetaShare(Layer):

		def set_tensors(self, tensor):
		self.out_tensor, self.betas = tensor


		class ANIFeat(Layer):
		"""Performs transform from 3D coordinates to ANI symmetry functions
		"""

		def __init__(self,
		in_layers,
		max_atoms=23,
		radial_cutoff=4.6,
		angular_cutoff=3.1,
		radial_length=32,
		angular_length=8,
		atom_cases=[1, 6, 7, 8, 16],
		atomic_number_differentiated=True,
		coordinates_in_bohr=True,
		**kwargs):
		"""
		Only X can be transformed
		"""
		self.max_atoms = max_atoms
		self.radial_cutoff = radial_cutoff
		self.angular_cutoff = angular_cutoff
		self.radial_length = radial_length
		self.angular_length = angular_length
		self.atom_cases = atom_cases
		self.atomic_number_differentiated = atomic_number_differentiated
		self.coordinates_in_bohr = coordinates_in_bohr
		super(ANIFeat, self).__init__(in_layers, **kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		"""
		In layers should be of shape dtype tf.float32, (None, self.max_atoms, 4)

		"""
		inputs = self._get_input_tensors(in_layers)[0]
		atom_numbers = tf.cast(inputs[:, :, 0], tf.int32)
		flags = tf.sign(atom_numbers)
		flags = tf.to_float(tf.expand_dims(flags, 1) * tf.expand_dims(flags, 2))
		coordinates = inputs[:, :, 1:]
		if self.coordinates_in_bohr:
		coordinates = coordinates * 0.52917721092

		d = self.distance_matrix(coordinates, flags)

		d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags)
		d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags)

		radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
		angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers,
		coordinates)

		out_tensor = tf.concat(
		[
		tf.to_float(tf.expand_dims(atom_numbers, 2)),
		radial_sym,
		angular_sym
		],
		axis=2)

		if set_tensors:
		self.out_tensor = out_tensor

		return out_tensor

		def distance_matrix(self, coordinates, flags):
		""" Generate distance matrix """
		# (TODO YTZ:) faster, less memory intensive way
		# r = tf.reduce_sum(tf.square(coordinates), 2)
		# r = tf.expand_dims(r, -1)
		# inner = 2*tf.matmul(coordinates, tf.transpose(coordinates, perm=[0,2,1]))
		# # inner = 2*tf.matmul(coordinates, coordinates, transpose_b=True)

		# d = r - inner + tf.transpose(r, perm=[0,2,1])
		# d = tf.nn.relu(d) # fix numerical instabilities about diagonal
		# d = tf.sqrt(d) # does this have negative elements? may be unstable for diagonals

		# d = d * flags

		max_atoms = self.max_atoms
		tensor1 = tf.stack([coordinates] * max_atoms, axis=1)
		tensor2 = tf.stack([coordinates] * max_atoms, axis=2)

		# Calculate pairwise distance
		d = tf.sqrt(tf.nn.relu(tf.reduce_sum(tf.squared_difference(tensor1,tensor2), axis=3)))
		# Masking for valid atom index
		d = d * flags
		return d

		def distance_cutoff(self, d, cutoff, flags):
		""" Generate distance matrix with trainable cutoff """
		# Cutoff with threshold Rc
		d_flag = flags * tf.sign(cutoff - d)
		d_flag = tf.nn.relu(d_flag)
		d_flag = d_flag * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
		d = 0.5 * (tf.cos(np.pi * d / cutoff) + 1)
		return d * d_flag
		# return d

		def radial_symmetry(self, d_cutoff, d, atom_numbers):
		""" Radial Symmetry Function """
		embedding = tf.eye(np.max(self.atom_cases) + 1)
		atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

		Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
		ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
		Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, -1)))
		ita = tf.to_float(np.reshape(ita, (1, 1, 1, -1)))
		length = ita.get_shape().as_list()[-1]

		d_cutoff = tf.stack([d_cutoff] * length, axis=3)
		d = tf.stack([d] * length, axis=3)

		out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
		if self.atomic_number_differentiated:
		out_tensors = []
		for atom_type in self.atom_cases:
		selected_atoms = tf.expand_dims(
		tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
		axis=3)
		out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
		return tf.concat(out_tensors, axis=2)
		else:
		return tf.reduce_sum(out, axis=2)

		def angular_symmetry(self, d_cutoff, d, atom_numbers, coordinates):
		""" Angular Symmetry Function """

		max_atoms = self.max_atoms
		embedding = tf.eye(np.max(self.atom_cases) + 1)
		atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

		Rs = np.linspace(0., self.angular_cutoff, self.angular_length)
		ita = 3 / (Rs[1] - Rs[0])**2
		thetas = np.linspace(0., np.pi, self.angular_length)
		zeta = float(self.angular_length**2)

		ita, zeta, Rs, thetas = np.meshgrid(ita, zeta, Rs, thetas)
		zeta = tf.to_float(np.reshape(zeta, (1, 1, 1, 1, -1)))
		ita = tf.to_float(np.reshape(ita, (1, 1, 1, 1, -1)))
		Rs = tf.to_float(np.reshape(Rs, (1, 1, 1, 1, -1)))
		thetas = tf.to_float(np.reshape(thetas, (1, 1, 1, 1, -1)))
		length = zeta.get_shape().as_list()[-1]


		# tf.stack issues again...
		vector_distances = tf.stack([coordinates] * max_atoms, 1) - tf.stack(
		[coordinates] * max_atoms, 2)
		R_ij = tf.stack([d] * max_atoms, axis=3)
		R_ik = tf.stack([d] * max_atoms, axis=2)
		f_R_ij = tf.stack([d_cutoff] * max_atoms, axis=3)
		f_R_ik = tf.stack([d_cutoff] * max_atoms, axis=2)

		# Define angle theta = arccos(R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance))
		vector_mul = tf.reduce_sum(tf.stack([vector_distances]max_atoms, axis=3) \
		tf.stack([vector_distances]*max_atoms, axis=2), axis=4)
		vector_mul = vector_mul * tf.sign(f_R_ij) * tf.sign(f_R_ik)
		theta = tf.acos(tf.div(vector_mul, R_ij * R_ik + 1e-5))

		R_ij = tf.stack([R_ij] * length, axis=4)
		R_ik = tf.stack([R_ik] * length, axis=4)
		f_R_ij = tf.stack([f_R_ij] * length, axis=4)
		f_R_ik = tf.stack([f_R_ik] * length, axis=4)
		theta = tf.stack([theta] * length, axis=4)

		out_tensor = tf.pow((1. + tf.cos(theta - thetas))/2., zeta) * \
		tf.exp(-ita * tf.square((R_ij + R_ik)/2. - Rs)) * f_R_ij * f_R_ik * 2

		if self.atomic_number_differentiated:
		out_tensors = []
		for id_j, atom_type_j in enumerate(self.atom_cases):
		for atom_type_k in self.atom_cases[id_j:]:
		selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
		tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
		selected_atoms = tf.expand_dims(
		tf.expand_dims(selected_atoms, axis=1), axis=4)
		out_tensors.append(
		tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
		return tf.concat(out_tensors, axis=2)
		else:
		return tf.reduce_sum(out_tensor, axis=(2, 3))

		def get_num_feats(self):
		n_feat = self.outputs.get_shape().as_list()[-1]
		return n_feat

deepchem/models/tensorgraph/models/symmetry_function_regression.py

+65 −7

Original line number	Diff line number	Diff line
		@@ -6,9 +6,12 @@ Created on Thu Jul 6 20:31:47 2017
		@author: zqwu
		"""
		import numpy as np
		import scipy.optimize
		import tensorflow as tf

		from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack
		import deepchem as dc

		from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError, Stack, Layer, ANIFeat
		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
		@@ -100,13 +103,11 @@ class BPSymmetryFunctionRegression(TensorGraph):
		feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
		yield feed_dict


		class ANIRegression(TensorGraph):

		def __init__(self,
		n_tasks,
		max_atoms,
		n_feat,
		layer_structures=[128, 64],
		atom_number_cases=[1, 6, 7, 8, 16],
		**kwargs):
		@@ -122,17 +123,74 @@ class ANIRegression(TensorGraph):
		"""
		self.n_tasks = n_tasks
		self.max_atoms = max_atoms
		self.n_feat = n_feat
		self.layer_structures = layer_structures
		self.atom_number_cases = atom_number_cases
		super(ANIRegression, self).__init__(**kwargs)
		self.build_graph()
		self.grad = None

		def compute_grad(self, dataset, batch_size=1):
		with self._get_tf("Graph").as_default():
		if not self.built:
		self.build()
		if not self.grad:
		self.grad = tf.gradients(self.outputs, self.atom_feats)
		feed_dict = dict()
		X = dataset.X
		flags = np.sign(np.array(X[:batch_size, :, 0]))
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_numbers] = np.array(X[:batch_size, :, 0], dtype=int)
		feed_dict[self.atom_feats] = np.array(X[:batch_size, :, :], dtype=float)
		return self.session.run([self.grad], feed_dict=feed_dict)

		def pred_one(self, X, atomic_nums):
		num_atoms = atomic_nums.shape[0]
		X = X.reshape((num_atoms, 3))
		A = atomic_nums.reshape((atomic_nums.shape[0], 1))
		Z = np.zeros((23, 4))
		Z[:X.shape[0], 1:X.shape[1]+1] = X
		Z[:A.shape[0], :A.shape[1]] = A
		X = Z
		dd = dc.data.NumpyDataset(np.array(X).reshape((1, 23, 4)), np.array(0), np.array(1))
		return self.predict(dd)[0]

		def grad_one(self, X, atomic_nums):
		y = self.pred_one(X, atomic_nums)
		num_atoms = atomic_nums.shape[0]
		X = X.reshape((num_atoms, 3))
		A = atomic_nums.reshape((atomic_nums.shape[0], 1))
		Z = np.zeros((23, 4))
		Z[:X.shape[0], 1:X.shape[1]+1] = X
		Z[:A.shape[0], :A.shape[1]] = A
		X = Z
		inp = np.array(X).reshape((1, 23, 4))
		dd = dc.data.NumpyDataset(inp, np.array([y]), np.array([1]))
		res = self.compute_grad(dd)[0][0][0]
		res = res[:num_atoms, 1:]
		return res.reshape((num_atoms*3,))

		def minimize_structure(self, X, atomic_nums):
		num_atoms = atomic_nums.shape[0]
		res = scipy.optimize.minimize(
		self.pred_one,
		X,
		args=(atomic_nums,),
		jac=self.grad_one,
		method="BFGS",
		tol=1e-6,
		options={'disp': True})
		return res.x.reshape((num_atoms, 3))

		def build_graph(self):

		self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
		self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
		self.atom_feats = Feature(shape=(None, self.max_atoms, self.n_feat))
		previous_layer = self.atom_feats
		self.atom_feats = Feature(shape=(None, self.max_atoms, 4))

		previous_layer = ANIFeat(self.atom_feats)

		self.featurized = previous_layer

		Hiddens = []
		for n_hidden in self.layer_structures:
		@@ -188,5 +246,5 @@ class ANIRegression(TensorGraph):
		feed_dict[self.atom_flags] = np.stack([flags]self.max_atoms, axis=2)\
		np.stack([flags]*self.max_atoms, axis=1)
		feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, 1:], dtype=float)
		feed_dict[self.atom_feats] = np.array(X_b[:, :, :], dtype=float)
		yield feed_dict

deepchem/splits/splitters.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -195,7 +195,7 @@ class Splitter(object):

		class RandomGroupSplitter(Splitter):

		def __init__(self, groups):
		def __init__(self, groups, args, *kwargs):
		"""
		A splitter class that splits on groupings. An example use case is when there
		are multiple conformations of the same molecule that share the same topology.
		@@ -221,6 +221,7 @@ class RandomGroupSplitter(Splitter):

		"""
		self.groups = groups
		super(RandomGroupSplitter, self).__init__(args, *kwargs)

		def split(self,
		dataset,

examples/roitberg/app.py

0 → 100644

+46 −0

Original line number	Diff line number	Diff line
		from flask import request, abort, Flask

		import flask

		webapp = Flask(__name__)

		@webapp.route('/potential', methods=["POST"])
		def potential():
		content = request.get_json(force=True)
		if not content or not 'X' in content:
		abort(400)
		X = np.array(content['X'])
		x0 = X[:, 1:]
		a0 = X[:, :1]
		result = webapp.model.pred_one(x0, a0)
		return flask.jsonify({'y': result.tolist()[0]}), 200

		@webapp.route('/gradient', methods=["POST"])
		def index():
		content = request.get_json(force=True)
		if not content or not 'X' in content:
		abort(400)
		X = np.array(content['X'])
		num_atoms = X.shape[0]
		x0 = X[:, 1:]
		a0 = X[:, :1]

		res = webapp.model.grad_one(x0, a0)
		res = res.reshape((num_atoms, 3))

		return flask.jsonify({'grad': res.tolist()}), 200

		@webapp.route('/minimize', methods=["POST"])
		def minimize():
		content = request.get_json(force=True)
		if not content or not 'X' in content:
		abort(400)
		X = np.array(content['X'])
		num_atoms = X.shape[0]
		x0 = X[:, 1:]
		a0 = X[:, :1]

		res = webapp.model.minimize_structure(x0, a0)
		res = res.reshape((num_atoms, 3))

		return flask.jsonify({'X': res.tolist()}), 200
		No newline at end of file

examples/roitberg/pyanitools.py

0 → 100644

+126 −0

Original line number	Diff line number	Diff line
		# Written by Roman Zubatyuk and Justin S. Smith
		import h5py
		import numpy as np
		import platform
		import os

		PY_VERSION = int(platform.python_version().split('.')[0]) > 3


		class datapacker(object):

		def __init__(self, store_file, mode='w-', complib='gzip', complevel=6):
		"""Wrapper to store arrays within HFD5 file
		"""
		# opening file
		self.store = h5py.File(store_file, mode=mode)
		self.clib = complib
		self.clev = complevel

		def store_data(self, store_loc, **kwargs):
		"""Put arrays to store
		"""
		#print(store_loc)
		g = self.store.create_group(store_loc)
		for k, v, in kwargs.items():
		#print(type(v[0]))

		#print(k)
		if type(v) == list:
		if len(v) != 0:
		if type(v[0]) is np.str_ or type(v[0]) is str:
		v = [a.encode('utf8') for a in v]

		g.create_dataset(
		k, data=v, compression=self.clib, compression_opts=self.clev)

		def cleanup(self):
		"""Wrapper to close HDF5 file
		"""
		self.store.close()


		class anidataloader(object):
		''' Contructor '''

		def __init__(self, store_file):
		if not os.path.exists(store_file):
		exit('Error: file not found - ' + store_file)
		self.store = h5py.File(store_file)

		''' Group recursive iterator (iterate through all groups in all branches and return datasets in dicts) '''

		def h5py_dataset_iterator(self, g, prefix=''):
		for key in g.keys():
		item = g[key]
		path = '{}/{}'.format(prefix, key)
		keys = [i for i in item.keys()]
		if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
		data = {'path': path}
		for k in keys:
		if not isinstance(item[k], h5py.Group):
		dataset = np.array(item[k].value)

		if type(dataset) is np.ndarray:
		if dataset.size != 0:
		if type(dataset[0]) is np.bytes_:
		dataset = [a.decode('ascii') for a in dataset]

		data.update({k: dataset})

		yield data
		else: # test for group (go down)
		yield from self.h5py_dataset_iterator(item, path)

		''' Default class iterator (iterate through all data) '''

		def __iter__(self):
		for data in self.h5py_dataset_iterator(self.store):
		yield data

		''' Returns a list of all groups in the file '''

		def get_group_list(self):
		return [g for g in self.store.values()]

		''' Allows interation through the data in a given group '''

		def iter_group(self, g):
		for data in self.h5py_dataset_iterator(g):
		yield data

		''' Returns the requested dataset '''

		def get_data(self, path, prefix=''):
		item = self.store[path]
		path = '{}/{}'.format(prefix, path)
		keys = [i for i in item.keys()]
		data = {'path': path}
		# print(path)
		for k in keys:
		if not isinstance(item[k], h5py.Group):
		dataset = np.array(item[k].value)

		if type(dataset) is np.ndarray:
		if dataset.size != 0:
		if type(dataset[0]) is np.bytes_:
		dataset = [a.decode('ascii') for a in dataset]

		data.update({k: dataset})
		return data

		''' Returns the number of groups '''

		def group_size(self):
		return len(self.get_group_list())

		def size(self):
		count = 0
		for g in self.store.values():
		count = count + len(g.items())
		return count

		''' Close the HDF5 file '''

		def cleanup(self):
		self.store.close()
		No newline at end of file

Admin message