Removing model_params (b4ad5857) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorflow_models/init.py

+25 −25

Original line number	Diff line number	Diff line
		@@ -27,6 +27,15 @@ class TensorflowGraph(object):
		penalty
		nb_epoch
		pad_batches
		penalty_type
		optimizer
		learning_rate
		momentum
		data_shape
		layer_sizes
		weight_init_stddevs
		bias_init_consts
		dropouts

		Classifier:
		num_classes
		@@ -52,7 +61,6 @@ class TensorflowGraph(object):
		normalization. Should be set to tf.no_op() if no updates are required.

		This base class provides the following attributes:
		model_params: dictionary containing model configuration parameters.
		graph: TensorFlow graph object.
		logdir: Path to the file output directory to store checkpoints etc.
		master: TensorFlow session master specification string.
		@@ -62,25 +70,24 @@ class TensorflowGraph(object):
		mask when calculating gradient costs.

		Args:
		model_params: dictionary.
		train: If True, model is in training mode.
		logdir: Directory for output files.
		"""

		def __init__(self, model_params, logdir, tasks, task_types, train=True,
		verbosity=None):
		def __init__(self, n_tasks, n_inputs, layer_sizes=[1000],
		weight_init_stddevs=[.02], bias_init_consts=[1], penalty=0.0,
		learning_rate=.001, momentum=".9", optimizer="adam",
		batch_size=50, num_classes=2, logdir, train=True, verbosity=None):
		"""Constructs the computational graph.

		Args:
		train: whether model is in train mode
		model_params: dictionary of model parameters
		logdir: Location to save data

		This function constructs the computational graph for the model. It relies
		subclassed methods (build/cost) to construct specific graphs.
		"""
		self.graph = tf.Graph()
		self.model_params = model_params
		self.logdir = logdir
		self.tasks = tasks
		self.task_types = task_types
		@@ -143,7 +150,6 @@ class TensorflowGraph(object):
		with self.graph.as_default():
		self.require_attributes(['output', 'labels', 'weights'])
		epsilon = 1e-3 # small float to avoid dividing by zero
		model_params = self.model_params
		weighted_costs = [] # weighted costs for each example
		gradient_costs = [] # costs used for gradient calculation

		@@ -162,7 +168,7 @@ class TensorflowGraph(object):
		# tf.reduce_mean (which can put ops on the CPU) we explicitly
		# calculate with div/sum so it stays on the GPU.
		gradient_cost = tf.div(tf.reduce_sum(weighted_cost),
		model_params["batch_size"])
		self.batch_size)
		gradient_costs.append(gradient_cost)

		# aggregated costs
		@@ -171,8 +177,8 @@ class TensorflowGraph(object):
		loss = tf.add_n(gradient_costs)

		# weight decay
		if model_params["penalty"] != 0.0:
		penalty = model_ops.WeightDecay(model_params)
		if self.penalty != 0.0:
		penalty = model_ops.WeightDecay(self.penalty_type, self.penalty)
		loss += penalty

		# loss used for gradient calculation
		@@ -189,8 +195,8 @@ class TensorflowGraph(object):
		else:
		self.updates = tf.no_op(name='updates')

		def fit(self, dataset, shuffle=False, max_checkpoints_to_keep=5,
		log_every_N_batches=50):
		def fit(self, dataset, nb_epoch=10, pad_batches=False, shuffle=False,
		max_checkpoints_to_keep=5, log_every_N_batches=50):
		"""Fit the model.

		Args:
		@@ -205,14 +211,9 @@ class TensorflowGraph(object):
		time1 = time.time()
		############################################################## TIMING
		num_datapoints = len(dataset)
		batch_size = self.model_params["batch_size"]
		batch_size = self.batch_size
		step_per_epoch = np.ceil(float(num_datapoints)/batch_size)
		nb_epoch = self.model_params["nb_epoch"]
		log("Training for %d epochs" % nb_epoch, self.verbosity)
		if "pad_batches" in self.model_params:
		pad_batches = self.model_params["pad_batches"]
		else:
		pad_batches = False
		with self.graph.as_default():
		self.require_attributes(['loss', 'updates'])
		train_op = self.get_training_op()
		@@ -390,7 +391,7 @@ class TensorflowGraph(object):
		Returns:
		A training op.
		"""
		opt = model_ops.Optimizer(self.model_params)
		opt = model_ops.Optimizer(self.optimizer, self.learning_rate, self.momentum)
		return opt.minimize(self.loss, name='train')

		def _get_shared_session(self):
		@@ -499,9 +500,8 @@ class TensorflowClassifier(TensorflowGraph):
		feeding and fetching the same tensor.
		"""
		with self.graph.as_default():
		model_params = self.model_params
		batch_size = model_params["batch_size"]
		num_classes = model_params["num_classes"]
		batch_size = self.batch_size
		num_classes = self.num_classes
		labels = []
		for task in xrange(self.num_tasks):
		with tf.name_scope(self.placeholder_scope):
		@@ -555,7 +555,7 @@ class TensorflowRegressor(TensorflowGraph):
		feeding and fetching the same tensor.
		"""
		with self.graph.as_default():
		batch_size = self.model_params["batch_size"]
		batch_size = self.batch_size
		labels = []
		for task in xrange(self.num_tasks):
		with tf.name_scope(self.placeholder_scope):
		@@ -578,9 +578,9 @@ class TensorflowModel(Model):
		self.verbosity = verbosity
		if tf_class is None:
		tf_class = TensorflowGraph
		self.train_model = tf_class(model_params, logdir, tasks, task_types,
		self.train_model = tf_class(logdir, tasks, task_types,
		train=True, verbosity=verbosity)
		self.eval_model = tf_class(model_params, logdir, tasks, task_types,
		self.eval_model = tf_class(logdir, tasks, task_types,
		train=False, verbosity=verbosity)
		self.fit_transformers = None

deepchem/models/tensorflow_models/data_structures.py

+0 −68

Original line number	Diff line number	Diff line
		try:
		from neuralfingerprint.mol_graph import graph_from_smiles_tuple, degrees
		from neuralfingerprint import mol_graph
		from data_parser import parse_graph
		except:
		pass

		import numpy as np

		class Molecule(object):
		def __init__(self, atoms, adj_mat, type_adj, deg_list, bonds):
		self.atoms = atoms
		self.adj_mat = adj_mat
		self.type_adj = type_adj
		self.deg_list = deg_list
		self.bonds = bonds

		def get_atoms(self):
		return self.atoms

		def get_adj_mat(self):
		return self.adj_mat

		def get_type_adj(self):
		return self.type_adj

		def get_deg_list(self):
		return self.deg_list

		def get_bonds(self):
		return self.bonds

		class SmilesDataManager():
		def __init__(self, raw_smiles, targets, bond_decimals):
		self.raw_smiles = raw_smiles
		self.targets = targets

		# Run parser
		self.run(bond_decimals)

		def run(self, bond_decimals):
		self.parse_data(bond_decimals)

		def get_N_molecules(self):
		return len(self.raw_smiles)

		def parse_data(self, bond_decimals):
		self.molecules = []

		k = 0
		while k < self.get_N_molecules():
		smile = self.raw_smiles[k]
		target = self.targets[k]

		# Convert smile to graph
		#print(target, smile)
		graph = mol_graph.graph_from_smiles(smile)

		# Get features
		try:
		atoms, adj_mat, type_adj, deg_list, bonds = parse_graph(graph, bond_decimals)
		self.molecules.append(Molecule(atoms, adj_mat, type_adj, deg_list, bonds))
		except:
		# Remove the bad example
		self.raw_smiles = np.delete(self.raw_smiles, k)
		self.targets = np.delete(self.targets, k)
		print("molecule with smile " + smile + " failed to compile")
		k += 1

deepchem/models/tensorflow_models/fcnet.py

+7 −8

Original line number	Diff line number	Diff line
		@@ -98,8 +98,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
		batch_size x num_features.
		"""
		assert len(self.model_params["data_shape"]) == 1
		num_features = self.model_params["data_shape"][0]
		num_features = self.n_inputs
		with self.graph.as_default():
		with tf.name_scope(self.placeholder_scope):
		self.mol_features = tf.placeholder(
		@@ -107,10 +106,10 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		shape=[None, num_features],
		name='mol_features')

		layer_sizes = self.model_params["layer_sizes"]
		weight_init_stddevs = self.model_params["weight_init_stddevs"]
		bias_init_consts = self.model_params["bias_init_consts"]
		dropouts = self.model_params["dropouts"]
		layer_sizes = self.layer_sizes
		weight_init_stddevs = self.weight_init_stddevs
		bias_init_consts = self.bias_init_consts
		dropouts = self.dropouts
		lengths_set = {
		len(layer_sizes),
		len(weight_init_stddevs),
		@@ -158,13 +157,13 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		else:
		# Dummy placeholders
		orig_dict["labels_%d" % task] = np.squeeze(to_one_hot(
		np.zeros((self.model_params["batch_size"],))))
		np.zeros((self.batch_size,))))
		if w_b is not None:
		orig_dict["weights_%d" % task] = w_b[:, task]
		else:
		# Dummy placeholders
		orig_dict["weights_%d" % task] = np.ones(
		(self.model_params["batch_size"],))
		(self.batch_size,))
		return self._get_feed_dict(orig_dict)

		def predict_proba_on_batch(self, X):

deepchem/models/tensorflow_models/model_ops.py

+20 −22

Original line number	Diff line number	Diff line
		@@ -211,11 +211,11 @@ def is_training():
		raise ValueError('Training mode has more than one setting.')
		return train[0]

		def WeightDecay(model_params):
		def WeightDecay(penalty_type, penalty):
		"""Add weight decay.

		Args:
		model_params: dictionary.
		model: TensorflowGraph.

		Returns:
		A scalar tensor containing the weight decay cost.
		@@ -230,14 +230,13 @@ def WeightDecay(model_params):
		variables.append(v)

		with tf.name_scope('weight_decay'):
		if model_params["penalty_type"] == 'l1':
		if penalty_type == 'l1':
		cost = tf.add_n([tf.reduce_sum(tf.Abs(v)) for v in variables])
		elif model_params["penalty_type"] == 'l2':
		elif penalty_type == 'l2':
		cost = tf.add_n([tf.nn.l2_loss(v) for v in variables])
		else:
		raise NotImplementedError('Unsupported penalty_type %s' %
		model_params["penalty_type"])
		cost *= model_params["penalty"]
		raise NotImplementedError('Unsupported penalty_type %s' % penalty_type)
		cost *= penalty
		tf.scalar_summary('Weight Decay Cost', cost)
		return cost

		@@ -377,11 +376,10 @@ def Transform(tensor, transform, convolution=True, mask=None):
		tensor = model_utils.Mask(tensor, mask)
		return tensor

		def Optimizer(model_params):
		def Optimizer(optimizer="adam", learning_rate=.001, momentum=.9):
		"""Create model optimizer.

		Args:
		model_params: dictionary.

		Returns:
		A training Optimizer.
		@@ -390,18 +388,18 @@ def Optimizer(model_params):
		NotImplementedError: If an unsupported optimizer is requested.
		"""
		# TODO(user): gradient clipping (see Minimize)
		if model_params["optimizer"] == 'adagrad':
		train_op = tf.train.AdagradOptimizer(model_params["learning_rate"])
		elif model_params["optimizer"] == 'adam':
		train_op = tf.train.AdamOptimizer(model_params["learning_rate"])
		elif model_params["optimizer"] == 'momentum':
		train_op = tf.train.MomentumOptimizer(model_params["learning_rate"],
		model_params["momentum"])
		elif model_params["optimizer"] == 'rmsprop':
		train_op = tf.train.RMSPropOptimizer(model_params["learning_rate"],
		model_params["momentum"])
		elif model_params["optimizer"] == 'sgd':
		train_op = tf.train.GradientDescentOptimizer(model_params["learning_rate"])
		if optimizer == 'adagrad':
		train_op = tf.train.AdagradOptimizer(learning_rate)
		elif optimizer == 'adam':
		train_op = tf.train.AdamOptimizer(learning_rate)
		elif optimizer == 'momentum':
		train_op = tf.train.MomentumOptimizer(learning_rate,
		momentum)
		elif optimizer == 'rmsprop':
		train_op = tf.train.RMSPropOptimizer(learning_rate,
		momentum)
		elif optimizer == 'sgd':
		train_op = tf.train.GradientDescentOptimizer(learning_rate)
		else:
		raise NotImplementedError('Unsupported optimizer %s' % model_params["optimizer"])
		raise NotImplementedError('Unsupported optimizer %s' % optimizer)
		return train_op

Admin message