Merge pull request #357 from rbharath/tf_keras_refactor (f28d4dcc) · Commits · 钟慕尧 / deepchem

.travis.yml

+2 −1

Original line number	Diff line number	Diff line
		@@ -20,7 +20,8 @@ install:
		- conda install -c omnia openbabel
		- conda install joblib
		- conda install h5py
		- pip install keras
		- pip install six
		#- pip install keras
		- export KERAS_BACKEND=tensorflow
		- conda install -c omnia mdtraj
		- pip install tensorflow

deepchem/data/datasets.py

+5 −0

Original line number	Diff line number	Diff line
		@@ -162,8 +162,10 @@ class Dataset(object):
		"""Get an object that iterates over the samples in the dataset.

		Example:

		>>> for x, y, w, id in dataset.itersamples():
		>>> print(x, y, w, id)

		"""
		raise NotImplementedError()

		@@ -317,8 +319,10 @@ class NumpyDataset(Dataset):
		"""Get an object that iterates over the samples in the dataset.

		Example:

		>>> for x, y, w, id in dataset.itersamples():
		>>> print(x, y, w, id)

		"""
		n_samples = self._X.shape[0]
		return ((self._X[i], self._y[i], self._w[i], self._ids[i])
		@@ -580,6 +584,7 @@ class DiskDataset(Dataset):
		"""Get an object that iterates over the samples in the dataset.

		Example:

		>>> for x, y, w, id in dataset.itersamples():
		>>> print(x, y, w, id)
		"""

deepchem/feat/mol_graphs.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -349,8 +349,8 @@ class ConvMol(object):
		return concat_mol

		class MultiConvMol(object):
		"""Holds information about multiple molecules, for use in feeding information into
		tensorflow or keras. Generated using the agglomerate_mols function
		"""Holds information about multiple molecules, for use in feeding information
		into tensorflow. Generated using the agglomerate_mols function
		"""
		def __init__(self, nodes, deg_adj_lists, deg_slice, membership, num_mols):

deepchem/models/init.py

+3 −3

Original line number	Diff line number	Diff line
		@@ -7,9 +7,9 @@ from __future__ import unicode_literals

		from deepchem.models.models import Model
		from deepchem.models.sklearn_models import SklearnModel
		from deepchem.models.tf_keras_models.multitask_classifier import MultitaskGraphClassifier
		from deepchem.models.tf_keras_models.multitask_regressor import MultitaskGraphRegressor
		from deepchem.models.tf_keras_models.support_classifier import SupportGraphClassifier
		from deepchem.models.tf_new_models.multitask_classifier import MultitaskGraphClassifier
		from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor
		from deepchem.models.tf_new_models.support_classifier import SupportGraphClassifier
		from deepchem.models.multitask import SingletaskToMultitask

		from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskRegressor

deepchem/models/keras_models/init.py

deleted100644 → 0

+0 −108

Original line number	Diff line number	Diff line
		"""
		Code for processing the Google vs-datasets using keras.
		"""
		from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		import os
		import numpy as np
		#from keras.models import Graph
		#from keras.models import load_model
		#from keras.models import model_from_json
		#from keras.layers.core import Dense, Dropout, Activation
		#from keras.layers.normalization import BatchNormalization
		#from keras.optimizers import SGD
		#from deepchem.models import Model
		#
		#class KerasModel(Model):
		# """
		# Abstract base class shared across all Keras models.
		# """
		#
		# def save(self):
		# """
		# Saves underlying keras model to disk.
		# """
		# model = self.model_instance
		# filename, _ = os.path.splitext(Model.get_model_filename(self.model_dir))
		#
		# ## Note that keras requires the model architecture and weights to be stored
		# ## separately. A json file is generated that specifies the model architecture.
		# ## The weights will be stored in an h5 file. The pkl.gz file with store the
		# ## target name.
		# json_filename = "%s.%s" % (filename, "json")
		# h5_filename = "%s.%s" % (filename, "h5")
		# self.model_instance.save(h5_filename)
		# # Save architecture
		# json_string = model.to_json()
		# with open(json_filename, "w") as file_obj:
		# file_obj.write(json_string)
		# model.save_weights(h5_filename, overwrite=True)
		#
		# def reload(self, custom_objects={}):
		# """
		# Load keras multitask DNN from disk.
		# """
		# filename = Model.get_model_filename(self.model_dir)
		# filename, _ = os.path.splitext(filename)
		#
		# json_filename = "%s.%s" % (filename, "json")
		# h5_filename = "%s.%s" % (filename, "h5")
		#
		# with open(json_filename) as file_obj:
		# model = model_from_json(file_obj.read(), custom_objects=custom_objects)
		# model.load_weights(h5_filename)
		# self.model_instance = model
		#
		# def predict_on_batch(self, X, pad_batch=False):
		# """
		# Makes predictions on given batch of new data.
		#
		# Parameters
		# ----------
		# X: np.ndarray
		# Features
		# pad_batch: bool, optional
		# Used for Tensorflow models with rigid batch-size requirements.
		# """
		# n_samples = len(X)
		# n_tasks = self.get_num_tasks()
		# if pad_batch:
		# X = pad_features(self.batch_size, X)
		# y_pred = self.model_instance.predict_on_batch(X)
		# y_pred = np.reshape(y_pred, (n_samples, n_tasks))
		# return y_pred
		#
		# # TODO(rbharath): The methods below aren't extensible and depend on
		# # implementation details of fcnet. Better way to expose this information?
		# def fit_on_batch(self, X, y, w):
		# """Fit model on batch of data."""
		# return self.model_instance.fit_on_batch(X, y, w)
		#
		# def get_num_tasks(self):
		# return self.model_instance.n_tasks
		#
		# def predict_proba_on_batch(self, X, pad_batch=False, n_classes=2):
		# """
		# Makes predictions of class probabilities on given batch of new data.
		#
		# Parameters
		# ----------
		# X: np.ndarray
		# Features
		# pad_batch: bool, optional
		# Ignored for Sklearn Model. Only used for Tensorflow models
		# with rigid batch-size requirements.
		# n_classes: int
		# Number of classifier classes
		# """
		# n_samples = len(X)
		# n_tasks = self.get_num_tasks()
		#
		# if pad_batch:
		# X = pad_features(self.batch_size, X)
		# y_pred_proba = self.model_instance.predict_proba_on_batch(X,
		# n_classes)
		# y_pred_proba = np.reshape(y_pred_proba, (n_samples, n_tasks, n_classes))
		# return y_pred_proba

Admin message