Merge pull request #4 from deepchem/master (c0406ec0) · Commits · 钟慕尧 / deepchem

Dockerfile

+8 −17

Original line number	Diff line number	Diff line
		FROM nvidia/cuda
		FROM nvidia/cuda:9.0-cudnn7-runtime

		# Install some utilities
		RUN apt-get update && \
		apt-get install -y -q wget git libxrender1 libsm6 && \
		apt-get install -y -q wget git libxrender1 libsm6 bzip2 && \
		apt-get clean

		# Install miniconda
		RUN MINICONDA="Miniconda2-latest-Linux-x86_64.sh" && \
		RUN MINICONDA="Miniconda3-latest-Linux-x86_64.sh" && \
		wget --quiet https://repo.continuum.io/miniconda/$MINICONDA && \
		bash $MINICONDA -b -p /miniconda && \
		rm -f $MINICONDA
		ENV PATH /miniconda/bin:$PATH

		# Install deepchem conda package from omnia
		# TODO: Uncomment this when there is a stable release of deepchem.
		#RUN conda config --add channels omnia
		#RUN conda install --yes deepchem

		# Install deepchem with GPU support from github using Tue 14 Mar 2017 git head
		# TODO: Get rid of this when there is a stable release of deepchem.
		RUN git clone https://github.com/deepchem/deepchem.git && \
		RUN conda update -n base conda
		RUN export LANG=en_US.UTF-8 && \
		git clone https://github.com/deepchem/deepchem.git && \
		cd deepchem && \
		git checkout tags/1.3.1 && \
		git checkout 2.0.0 && \
		sed -i -- 's/tensorflow$/tensorflow-gpu/g' scripts/install_deepchem_conda.sh && \
		bash scripts/install_deepchem_conda.sh root && \
		bash scripts/install_deepchem_conda.sh && \
		python setup.py develop

		# Clean up
		RUN cd deepchem && \
		git clean -fX

		# Run tests
		#RUN pip install nose && \
		# nosetests -v deepchem --nologcapture

README.md

+3 −3

Original line number	Diff line number	Diff line
		@@ -59,7 +59,7 @@ git clone https://github.com/deepchem/deepchem.git # Clone deepchem source
		cd deepchem
		bash scripts/install_deepchem_conda.sh deepchem
		source activate deepchem
		yes \| pip install tensorflow-gpu==1.5.0 # If you want GPU support
		yes \| pip install tensorflow-gpu==1.6.0 # If you want GPU support
		python setup.py install # Manual install
		nosetests -a '!slow' -v deepchem --nologcapture # Run tests
		```
		@@ -72,7 +72,7 @@ via this installation procedure.
		### Easy Install via Conda

		```bash
		conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=1.3.1
		conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=2.0.0
		```
		Note: `Easy Install` installs the latest stable version of `deepchem` and _does not install from source_. If you need to install from source make sure you follow the steps [here](#using-a-conda-environment).

		@@ -182,4 +182,4 @@ DeepChem is supported by a number of corporate partners who use DeepChem to solv


		## Version
		1.3.1
		2.0.0

deepchem/data/datasets.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -200,8 +200,8 @@ class Dataset(object):
		>>> dataset = NumpyDataset(np.ones((2,2)))
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [0.0] [0.0] 0
		[1.0, 1.0] [0.0] [0.0] 1
		"""
		raise NotImplementedError()

		@@ -409,8 +409,8 @@ class NumpyDataset(Dataset):
		>>> dataset = NumpyDataset(np.ones((2,2)))
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [0.0] [0.0] 0
		[1.0, 1.0] [0.0] [0.0] 1
		"""
		n_samples = self._X.shape[0]
		return ((self._X[i], self._y[i], self._w[i], self._ids[i])
		@@ -889,8 +889,8 @@ class DiskDataset(Dataset):
		>>> dataset = DiskDataset.from_numpy(np.ones((2,2)), np.ones((2,1)), verbose=False)
		>>> for x, y, w, id in dataset.itersamples():
		... print(x.tolist(), y.tolist(), w.tolist(), id)
		[1.0 1.0] [0.0] [0.0] 0
		[1.0 1.0] [0.0] [0.0] 1
		[1.0, 1.0] [1.0] [1.0] 0
		[1.0, 1.0] [1.0] [1.0] 1
		"""

		def iterate(dataset):

deepchem/feat/graph_features.py

+50 −7

Original line number	Diff line number	Diff line
		@@ -3,7 +3,6 @@ from __future__ import unicode_literals

		import numpy as np
		from rdkit import Chem
		import itertools, operator

		from deepchem.feat import Featurizer
		from deepchem.feat.mol_graphs import ConvMol, WeaveMol
		@@ -199,8 +198,8 @@ def bond_features(bond, use_chirality=False):
		]
		if use_chirality:
		bond_feats = bond_feats + one_of_k_encoding_unk(
		str(bond.GetStereo(),
		["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"]))
		str(bond.GetStereo()),
		["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"])
		return np.array(bond_feats)


		@@ -264,7 +263,8 @@ def find_distance(a1, num_atoms, canon_adj_list, max_distance=7):
		class ConvMolFeaturizer(Featurizer):
		name = ['conv_mol']

		def __init__(self, master_atom=False, use_chirality=False):
		def __init__(self, master_atom=False, use_chirality=False,
		atom_properties=[]):
		"""
		Parameters
		----------
		@@ -274,7 +274,20 @@ class ConvMolFeaturizer(Featurizer):
		the molecule. This technique is briefly discussed in
		Neural Message Passing for Quantum Chemistry
		https://arxiv.org/pdf/1704.01212.pdf

		use_chirality: Boolean
		if true then make the resulting atom features aware of the
		chirality of the molecules in question
		atom_properties: list of string or None
		properties in the RDKit Mol object to use as additional
		atom-level features in the larger molecular feature. If None,
		then no atom-level properties are used. Properties should be in the
		RDKit mol object should be in the form
		atom XXXXXXXX NAME
		where XXXXXXXX is a zero-padded 8 digit number coresponding to the
		zero-indexed atom index of each atom and NAME is the name of the property
		provided in atom_properties. So "atom 00000000 sasa" would be the
		name of the molecule level property in mol where the solvent
		accessible surface area of atom 0 would be stored.

		Since ConvMol is an object and not a numpy array, need to set dtype to
		object.
		@@ -282,12 +295,39 @@ class ConvMolFeaturizer(Featurizer):
		self.dtype = object
		self.master_atom = master_atom
		self.use_chirality = use_chirality
		self.atom_properties = list(atom_properties)

		def _get_atom_properties(self, atom):
		"""
		For a given input RDKit atom return the values of the properties
		requested when initializing the featurize. See the __init__ of the
		class for a full description of the names of the properties

		Parameters
		----------
		atom: RDKit.rdchem.Atom
		Atom to get the properties of
		returns a numpy lists of floats of the same size as self.atom_properties
		"""
		values = []
		for prop in self.atom_properties:
		mol_prop_name = str("atom %08d %s" % (atom.GetIdx(), prop))
		try:
		values.append(float(atom.GetOwningMol().GetProp(mol_prop_name)))
		except KeyError:
		raise KeyError("No property %s found in %s in %s" %
		(mol_prop_name, atom.GetOwningMol(), self))
		return np.array(values)

		def _featurize(self, mol):
		"""Encodes mol as a ConvMol object."""
		# Get the node features
		idx_nodes = [(a.GetIdx(), atom_features(
		a, use_chirality=self.use_chirality)) for a in mol.GetAtoms()]
		idx_nodes = [(a.GetIdx(),
		np.concatenate((atom_features(
		a, use_chirality=self.use_chirality),
		self._get_atom_properties(a))))
		for a in mol.GetAtoms()]

		idx_nodes.sort() # Sort by ind to ensure same order as rd_kit
		idx, nodes = list(zip(*idx_nodes))

		@@ -315,6 +355,9 @@ class ConvMolFeaturizer(Featurizer):

		return ConvMol(nodes, canon_adj_list)

		def feature_length(self):
		return 75 + len(self.atom_properties)


		class WeaveFeaturizer(Featurizer):
		name = ['weave_mol']

deepchem/models/init.py

+6 −2

Original line number	Diff line number	Diff line
		@@ -16,8 +16,8 @@ from deepchem.models.tensorgraph.fcnet import MultiTaskFitTransformRegressor
		from deepchem.models.tensorgraph.IRV import TensorflowMultiTaskIRVClassifier
		from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskClassifier
		from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskRegressor
		from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor
		from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph, MPNNTensorGraph
		from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor, ProgressiveMultitaskClassifier
		from deepchem.models.tensorgraph.models.graph_models import WeaveModel, DTNNTensorGraph, DAGTensorGraph, GraphConvModel, MPNNTensorGraph
		from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression

		from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
		@@ -25,3 +25,7 @@ from deepchem.models.tensorgraph.models.gan import GAN, WGAN
		from deepchem.models.tensorgraph.models.text_cnn import TextCNNTensorGraph
		from deepchem.models.tensorgraph.sequential import Sequential
		from deepchem.models.tensorgraph.models.sequence_dnn import SequenceDNN

		#################### Compatibility imports for renamed TensorGraph models. Remove below with DeepChem 3.0. ####################

		from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, GraphConvTensorGraph
		No newline at end of file

Admin message