docstrings (bd1afbb7) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+32 −4

Original line number	Diff line number	Diff line
		@@ -481,6 +481,15 @@ class Dense(Layer):


		class Highway(Layer):
		""" Create a highway layer. y = H(x) * T(x) + x * (1 - T(x))
		H(x) = activation_fn(matmul(W_H, x) + b_H) is the non-linear transformed output
		T(x) = sigmoid(matmul(W_T, x) + b_T) is the transform gate

		reference: https://arxiv.org/pdf/1505.00387.pdf

		This layer expects its input to be a two dimensional tensor of shape (batch size, # input features).
		Outputs will be in the same shape.
		"""

		def __init__(
		self,
		@@ -488,7 +497,7 @@ class Highway(Layer):
		biases_initializer=tf.zeros_initializer,
		weights_initializer=tf.contrib.layers.variance_scaling_initializer,
		**kwargs):
		"""Create a highway layer. https://arxiv.org/pdf/1505.00387.pdf
		"""

		Parameters
		----------
		@@ -509,21 +518,24 @@ class Highway(Layer):
		inputs = self._get_input_tensors(in_layers)
		parent = inputs[0]
		shape = parent.get_shape().as_list()[1]
		dense1 = tf.contrib.layers.fully_connected(
		# H(x), with same number of input and output channels
		dense_H = tf.contrib.layers.fully_connected(
		parent,
		num_outputs=shape,
		activation_fn=self.activation_fn,
		biases_initializer=self.biases_initializer(),
		weights_initializer=self.weights_initializer(),
		trainable=True)
		dense2 = tf.contrib.layers.fully_connected(
		# T(x), with same number of input and output channels
		dense_T = tf.contrib.layers.fully_connected(
		parent,
		num_outputs=shape,
		activation_fn=tf.nn.sigmoid,
		biases_initializer=tf.constant_initializer(-1),
		weights_initializer=self.weights_initializer(),
		trainable=True)
		out_tensor = tf.multiply(dense1, dense2) + tf.multiply(parent, 1 - dense2)
		out_tensor = tf.multiply(dense_H, dense_T) + tf.multiply(
		parent, 1 - dense_T)
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor
		@@ -1566,8 +1578,24 @@ class Conv3D(Layer):


		class MaxPool1D(Layer):
		"""A 1D max pooling on the input.

		This layer expects its input to be a three dimensional tensor of shape
		(batch size, width, # channels).
		"""

		def __init__(self, window_shape=2, strides=1, padding="SAME", **kwargs):
		"""Create a MaxPool1D layer.

		Parameters
		----------
		window_shape: int, optional
		size of the window(assuming input with only one dimension)
		strides: int, optional
		stride of the sliding window
		padding: str
		the padding method to use, either 'SAME' or 'VALID'
		"""
		self.window_shape = window_shape
		self.strides = strides
		self.padding = padding

deepchem/models/tensorgraph/models/text_cnn.py

+25 −8

Original line number	Diff line number	Diff line
		#!/usr/bin/env python2
		# -- coding: utf-8 --
		"""
		Created on Thu Sep 28 15:17:50 2017

		@@ -19,7 +17,7 @@ from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.trans import undo_transforms

		# Common symbols in SMILES, note that Cl and Br are regarded as one symbol
		# Common symbols in SMILES, note that Cl and Br are regarded as single symbol
		default_dict = {
		'#': 1,
		'(': 2,
		@@ -58,6 +56,15 @@ default_dict = {


		class TextCNNTensorGraph(TensorGraph):
		""" A Convolutional neural network on smiles strings
		Reimplementation of the discriminator module in ORGAN: https://arxiv.org/abs/1705.10843
		Originated from: http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf

		This model applies multiple 1D convolutional filters to the padded strings,
		then max-over-time pooling is applied on all filters, extracting one feature per filter.
		All features are concatenated and transformed through several hidden layers to form predictions.

		"""

		def __init__(
		self,
		@@ -102,26 +109,35 @@ class TextCNNTensorGraph(TensorGraph):
		@staticmethod
		def build_char_dict(dataset, default_dict=default_dict):
		""" Collect all unique characters(in smiles) from the dataset.
		This method should be called before defining the model to build appropriate char_dict
		"""
		# SMILES strings
		X = dataset.ids
		# Maximum length is expanded to allow length variation during train and inference
		seq_length = int(max([len(smile) for smile in X]) * 1.2)
		# '_' served as delimiter and padding
		all_smiles = '_'.join(X)
		tot_len = len(all_smiles)
		keys = default_dict.keys()
		# Initialize common characters as keys
		keys = list(default_dict.keys())
		out_dict = copy.deepcopy(default_dict)
		current_key_val = len(keys) + 1
		# Include space to avoid extra keys
		keys.extend([' '])
		extra_keys = []
		i = 0
		while i < tot_len:
		# For 'Cl', 'Br', etc.
		if all_smiles[i:i + 2] in keys:
		i = i + 2
		elif all_smiles[i:i + 1] in keys:
		i = i + 1
		else:
		# Character not recognized, add to extra_keys
		extra_keys.append(all_smiles[i])
		keys.append(all_smiles[i])
		i = i + 1
		# Add all extra_keys to char_dict
		for extra_key in extra_keys:
		out_dict[extra_key] = current_key_val
		current_key_val += 1
		@@ -151,7 +167,7 @@ class TextCNNTensorGraph(TensorGraph):
		strides=1,
		padding='VALID',
		in_layers=[self.conv_layers[-1]]))

		# Concat features from all filters(one feature per filter)
		concat_outputs = Concat(axis=2, in_layers=self.pooled_outputs)
		outputs = Squeeze(squeeze_dims=1, in_layers=concat_outputs)
		dropout = Dropout(dropout_prob=self.dropout, in_layers=[outputs])
		@@ -215,7 +231,7 @@ class TextCNNTensorGraph(TensorGraph):
		feed_dict[label] = y_b[:, index:index + 1]
		if w_b is not None:
		feed_dict[self.weights] = w_b

		# Transform SMILES string to integer vectors
		smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
		feed_dict[self.smiles_seqs] = np.stack(smiles_seqs, axis=0)
		yield feed_dict
		@@ -228,10 +244,11 @@ class TextCNNTensorGraph(TensorGraph):
		keys = self.char_dict.keys()
		i = 0
		while i < smiles_len:
		# Skip all spaces
		if smiles[i:i + 1] == ' ':
		i = i + 1
		# For 'Cl', 'Br', etc.
		elif smiles[i:i + 2] in keys:
		# For Cl, Br, etc.
		seq.append(self.char_dict[smiles[i:i + 2]])
		i = i + 2
		elif smiles[i:i + 1] in keys:
		@@ -240,6 +257,6 @@ class TextCNNTensorGraph(TensorGraph):
		else:
		raise ValueError('character not found in dict')
		for i in range(self.seq_length - len(seq)):
		# Padding
		# Padding with '_'
		seq.append(self.char_dict['_'])
		return np.array(seq)

Admin message