Merge branch 'master' of https://github.com/deepchem/deepchem (8b3e3088) · Commits · 钟慕尧 / deepchem

README.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -59,7 +59,7 @@ git clone https://github.com/deepchem/deepchem.git # Clone deepchem source
		cd deepchem
		bash scripts/install_deepchem_conda.sh deepchem
		source activate deepchem
		yes \| pip install tensorflow-gpu==1.5.0 # If you want GPU support
		yes \| pip install tensorflow-gpu==1.6.0 # If you want GPU support
		python setup.py install # Manual install
		nosetests -a '!slow' -v deepchem --nologcapture # Run tests
		```

deepchem/data/datasets.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -200,8 +200,8 @@ class Dataset(object):
		>>> dataset = NumpyDataset(np.ones((2,2)))
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [0.0] [0.0] 0
		[1.0, 1.0] [0.0] [0.0] 1
		"""
		raise NotImplementedError()

		@@ -409,8 +409,8 @@ class NumpyDataset(Dataset):
		>>> dataset = NumpyDataset(np.ones((2,2)))
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [0.0] [0.0] 0
		[1.0, 1.0] [0.0] [0.0] 1
		"""
		n_samples = self._X.shape[0]
		return ((self._X[i], self._y[i], self._w[i], self._ids[i])
		@@ -889,8 +889,8 @@ class DiskDataset(Dataset):
		>>> dataset = DiskDataset.from_numpy(np.ones((2,2)), np.ones((2,1)), verbose=False)
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [1.0] [1.0] 0
		[1.0, 1.0] [1.0] [1.0] 1
		"""

		def iterate(dataset):

deepchem/feat/graph_features.py

+50 −7

Original line number	Diff line number	Diff line
		@@ -3,7 +3,6 @@ from __future__ import unicode_literals

		import numpy as np
		from rdkit import Chem
		import itertools, operator

		from deepchem.feat import Featurizer
		from deepchem.feat.mol_graphs import ConvMol, WeaveMol
		@@ -199,8 +198,8 @@ def bond_features(bond, use_chirality=False):
		]
		if use_chirality:
		bond_feats = bond_feats + one_of_k_encoding_unk(
		str(bond.GetStereo(),
		["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]))
		str(bond.GetStereo()),
		["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"])
		return np.array(bond_feats)


		@@ -264,7 +263,8 @@ def find_distance(a1, num_atoms, canon_adj_list, max_distance=7):
		class ConvMolFeaturizer(Featurizer):
		name = ['conv_mol']

		def __init__(self, master_atom=False, use_chirality=False):
		def __init__(self, master_atom=False, use_chirality=False,
		atom_properties=[]):
		"""
		Parameters
		----------
		@@ -274,7 +274,20 @@ class ConvMolFeaturizer(Featurizer):
		the molecule. This technique is briefly discussed in
		Neural Message Passing for Quantum Chemistry
		https://arxiv.org/pdf/1704.01212.pdf

		use_chirality: Boolean
		if true then make the resulting atom features aware of the
		chirality of the molecules in question
		atom_properties: list of string or None
		properties in the RDKit Mol object to use as additional
		atom-level features in the larger molecular feature. If None,
		then no atom-level properties are used. Properties should be in the
		RDKit mol object should be in the form
		atom XXXXXXXX NAME
		where XXXXXXXX is a zero-padded 8 digit number coresponding to the
		zero-indexed atom index of each atom and NAME is the name of the property
		provided in atom_properties. So "atom 00000000 sasa" would be the
		name of the molecule level property in mol where the solvent
		accessible surface area of atom 0 would be stored.

		Since ConvMol is an object and not a numpy array, need to set dtype to
		object.
		@@ -282,12 +295,39 @@ class ConvMolFeaturizer(Featurizer):
		self.dtype = object
		self.master_atom = master_atom
		self.use_chirality = use_chirality
		self.atom_properties = list(atom_properties)

		def _get_atom_properties(self, atom):
		"""
		For a given input RDKit atom return the values of the properties
		requested when initializing the featurize. See the __init__ of the
		class for a full description of the names of the properties

		Parameters
		----------
		atom: RDKit.rdchem.Atom
		Atom to get the properties of
		returns a numpy lists of floats of the same size as self.atom_properties
		"""
		values = []
		for prop in self.atom_properties:
		mol_prop_name = str("atom %08d %s" % (atom.GetIdx(), prop))
		try:
		values.append(float(atom.GetOwningMol().GetProp(mol_prop_name)))
		except KeyError:
		raise KeyError("No property %s found in %s in %s" %
		(mol_prop_name, atom.GetOwningMol(), self))
		return np.array(values)

		def _featurize(self, mol):
		"""Encodes mol as a ConvMol object."""
		# Get the node features
		idx_nodes = [(a.GetIdx(), atom_features(
		a, use_chirality=self.use_chirality)) for a in mol.GetAtoms()]
		idx_nodes = [(a.GetIdx(),
		np.concatenate((atom_features(
		a, use_chirality=self.use_chirality),
		self._get_atom_properties(a))))
		for a in mol.GetAtoms()]

		idx_nodes.sort() # Sort by ind to ensure same order as rd_kit
		idx, nodes = list(zip(*idx_nodes))

		@@ -315,6 +355,9 @@ class ConvMolFeaturizer(Featurizer):

		return ConvMol(nodes, canon_adj_list)

		def feature_length(self):
		return 75 + len(self.atom_properties)


		class WeaveFeaturizer(Featurizer):
		name = ['weave_mol']

deepchem/models/init.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -16,8 +16,8 @@ from deepchem.models.tensorgraph.fcnet import MultiTaskFitTransformRegressor
		from deepchem.models.tensorgraph.IRV import TensorflowMultiTaskIRVClassifier
		from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskClassifier
		from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskRegressor
		from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor
		from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph, MPNNTensorGraph
		from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor, ProgressiveMultitaskClassifier
		from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvModel, MPNNTensorGraph
		from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression

		from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
		@@ -25,3 +25,7 @@ from deepchem.models.tensorgraph.models.gan import GAN, WGAN
		from deepchem.models.tensorgraph.models.text_cnn import TextCNNTensorGraph
		from deepchem.models.tensorgraph.sequential import Sequential
		from deepchem.models.tensorgraph.models.sequence_dnn import SequenceDNN

		#################### Compatibility imports for renamed TensorGraph models. Remove below with DeepChem 3.0. ####################

		from deepchem.models.tensorgraph.models.graph_models import GraphConvTensorGraph

deepchem/models/tensorgraph/IRV.py

+10 −36

Original line number	Diff line number	Diff line
		@@ -8,7 +8,7 @@ import tensorflow as tf
		from deepchem.utils.save import log
		from deepchem.models.tensorgraph.tensor_graph import TensorGraph
		from deepchem.models.tensorgraph.layers import Layer, SigmoidCrossEntropy, \
		Sigmoid, Feature, Label, Weights, Concat, WeightedError
		Sigmoid, Feature, Label, Weights, Concat, WeightedError, Stack
		from deepchem.models.tensorgraph.layers import convert_to_layers
		from deepchem.trans import undo_transforms

		@@ -178,59 +178,33 @@ class TensorflowMultiTaskIRVClassifier(TensorGraph):
		https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2750043/
		"""
		self.mol_features = Feature(shape=(None, self.n_features))
		self._labels = Label(shape=(None, self.n_tasks))
		self._weights = Weights(shape=(None, self.n_tasks))
		predictions = IRVLayer(self.n_tasks, self.K, in_layers=[self.mol_features])
		costs = []
		self.labels_fd = []
		outputs = []
		for task in range(self.n_tasks):
		task_output = Slice(task, 1, in_layers=[predictions])
		sigmoid = Sigmoid(in_layers=[task_output])
		self.add_output(sigmoid)
		outputs.append(sigmoid)

		label = Label(shape=(None, 1))
		self.labels_fd.append(label)
		label = Slice(task, axis=1, in_layers=[self._labels])
		cost = SigmoidCrossEntropy(in_layers=[label, task_output])
		costs.append(cost)
		all_cost = Concat(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights]) + \
		loss = WeightedError(in_layers=[all_cost, self._weights]) + \
		IRVRegularize(predictions, self.penalty, in_layers=[predictions])
		self.set_loss(loss)

		def default_generator(self,
		dataset,
		epochs=1,
		predict=False,
		deterministic=True,
		pad_batches=True):
		"""TensorGraph style implementation """
		for epoch in range(epochs):
		if not predict:
		logger.info('Starting epoch %i' % epoch)
		for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
		batch_size=self.batch_size,
		deterministic=deterministic,
		pad_batches=pad_batches):

		feed_dict = dict()
		if y_b is not None:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		if w_b is not None:
		feed_dict[self.weights] = w_b
		feed_dict[self.mol_features] = X_b

		yield feed_dict
		outputs = Stack(axis=1, in_layers=outputs)
		self.add_output(outputs)

		def predict(self, dataset, transformers=[], outputs=None):
		out = super(TensorflowMultiTaskIRVClassifier, self).predict(
		dataset, transformers=transformers, outputs=outputs)
		out = np.concatenate(out, axis=1)
		out = np.round(out).astype(int)
		return out

		def predict_proba(self, dataset, transformers=[], outputs=None):
		out = super(TensorflowMultiTaskIRVClassifier, self).predict_proba(
		dataset, transformers=transformers, outputs=outputs)
		out = np.concatenate(out, axis=1)
		out = np.stack([1 - out, out], axis=2)
		return out
		return np.concatenate([1 - out, out], axis=2)

Admin message