Some more pylint fixes. (c2c07365) · Commits · 钟慕尧 / deepchem

deep_chem/models/standard.py

+18 −24

Original line number	Diff line number	Diff line
		"""
		Code for processing datasets using scikit-learn.
		"""
		import numpy as np
		from sklearn.ensemble import RandomForestClassifier
		from sklearn.ensemble import RandomForestRegressor
		from sklearn.linear_model import MultiTaskLasso
		from sklearn.linear_model import LogisticRegression
		from sklearn.linear_model import LinearRegression
		from sklearn.linear_model import RidgeCV
		from sklearn.linear_model import LassoCV
		from sklearn.linear_model import ElasticNetCV
		from sklearn.linear_model import LassoLarsCV
		from sklearn.svm import SVR

		def fit_singletask_models(train_data, modeltype, task_types):
		def fit_singletask_models(train_data, modeltype):
		"""Fits singletask linear regression models to potency.

		Parameters
		@@ -26,23 +23,20 @@ def fit_singletask_models(train_data, modeltype, task_types):
		Type of split for train/test. Either random or scaffold.
		seed: int (optional)
		Seed to initialize np.random.
		task_types: dict
		dict mapping target names to output type. Each output type must be either
		"classification" or "regression".
		output_transforms: dict
		dict mapping target names to label transform. Each output type must be either
		None or "log". Only for regression outputs.
		"""
		models = {}
		for index, target in enumerate(sorted(train_data.keys())):
		for target in sorted(train_data.keys()):
		print "Building model for target %s" % target
		(_, X_train, y_train, W_train) = train_data[target]
		(_, X_train, y_train, _) = train_data[target]
		if modeltype == "rf_regressor":
		model = RandomForestRegressor(n_estimators=500, n_jobs=-1,
		warm_start=True, max_features="sqrt")
		model = RandomForestRegressor(
		n_estimators=500, n_jobs=-1, warm_start=True, max_features="sqrt")
		elif modeltype == "rf_classifier":
		model = RandomForestClassifier(n_estimators=500, n_jobs=-1,
		warm_start=True, max_features="sqrt")
		model = RandomForestClassifier(
		n_estimators=500, n_jobs=-1, warm_start=True, max_features="sqrt")
		elif modeltype == "logistic":
		model = LogisticRegression(class_weight="auto")
		elif modeltype == "linear":
		@@ -63,11 +57,11 @@ def fit_singletask_models(train_data, modeltype, task_types):

		# TODO(rbharath): I believe this is broken. Update it to work with the rest of
		# the package.
		def fit_multitask_rf(train_data, task_types):
		def fit_multitask_rf(train_data):
		"""Fits a multitask RF model to provided dataset.
		"""
		(_, X_train, y_train, _) = train_data
		model = RandomForestClassifier(n_estimators=100, n_jobs=-1,
		class_weight="auto")
		model = RandomForestClassifier(
		n_estimators=100, n_jobs=-1, class_weight="auto")
		model.fit(X_train, y_train)
		return model

deep_chem/scripts/modeler.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -453,7 +453,7 @@ def _fit_model(paths, model, task_type, n_hidden, learning_rate, dropout,
		models = fit_3D_convolution(
		train_dict, task_types, nb_epoch=n_epochs, batch_size=batch_size)
		else:
		models = fit_singletask_models(train_dict, model, task_types)
		models = fit_singletask_models(train_dict, model)
		modeltype = get_model_type(model)
		save_model(models, modeltype, saved_out)

deep_chem/utils/evaluate.py

+43 −39

Original line number	Diff line number	Diff line
		@@ -11,7 +11,6 @@ import csv
		import numpy as np
		import warnings
		import sys
		from deep_chem.utils.preprocess import dataset_to_numpy
		from deep_chem.utils.preprocess import labels_to_weights
		from deep_chem.utils.preprocess import undo_transform_outputs
		from sklearn.metrics import mean_squared_error
		@@ -20,21 +19,24 @@ from sklearn.metrics import r2_score
		from sklearn.metrics import matthews_corrcoef
		from sklearn.metrics import recall_score
		from sklearn.metrics import accuracy_score
		from rdkit import Chem
		from rdkit.Chem.Descriptors import ExactMolWt

		def compute_model_performance(raw_test_data, test_data, task_types, models,
		modeltype, output_transforms, aucs=True, r2s=False, rms=False, recall=False,
		accuracy=False, mcc=False, print_file=sys.stdout):
		modeltype, output_transforms, aucs=True,
		r2s=False, rms=False, recall=False,
		accuracy=False, mcc=False,
		print_file=sys.stdout):
		"""Computes statistics for model performance on test set."""
		all_results, auc_vals, r2_vals, rms_vals, mcc_vals, recall_vals, accuracy_vals = {}, {}, {}, {}, {}, {}, {}
		all_results = {}
		auc_vals, mcc_vals, recall_vals, accuracy_vals = {}, {}, {}, {}
		r2_vals, rms_vals = {}, {}
		for index, target in enumerate(sorted(test_data.keys())):
		print("Evaluating model %d" % index, file=print_file)
		print("Target %s" % target, file=print_file)
		(test_ids, Xtest, ytest, wtest) = test_data[target]
		(test_ids, X_test, y_test, w_test) = test_data[target]
		(_, _, ytest_raw, _) = raw_test_data[target]
		model = models[target]
		results = eval_model(test_ids, Xtest, ytest, ytest_raw, wtest, model,
		results = eval_model(
		test_ids, X_test, y_test, ytest_raw, w_test, model,
		{target: task_types[target]}, modeltype=modeltype,
		output_transforms=output_transforms)
		all_results[target] = results[target]
		@@ -93,7 +95,8 @@ def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):
		# an upstream change so the evaluator doesn't have to worry about this.
		if len(np.shape(X)) > 2: # Dealing with 3D data
		if len(np.shape(X)) != 5:
		raise ValueError("Tensorial datatype must be of shape (n_samples, N, N, N, n_channels).")
		raise ValueError(
		"Tensorial datatype must be of shape (n_samples, N, N, N, n_channels).")
		(n_samples, axis_length, _, _, n_channels) = np.shape(X)
		X = np.reshape(X, (n_samples, axis_length, n_channels, axis_length, axis_length))
		if modeltype == "keras-graph":
		@@ -112,13 +115,14 @@ def model_predictions(X, model, n_targets, task_types, modeltype="sklearn"):
		ypreds = model.predict(X)
		else:
		raise ValueError("Improper modeltype.")
		if type(ypreds) == np.ndarray:
		if isinstance(ypreds, np.ndarray):
		ypreds = np.squeeze(ypreds)
		if type(ypreds) != list:
		if not isinstance(ypreds, list):
		ypreds = [ypreds]
		return ypreds

		def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types, output_transforms, modeltype="sklearn"):
		def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types,
		output_transforms, modeltype="sklearn"):
		"""Evaluates the provided model on the test-set.

		Returns a dict which maps target-names to pairs of np.ndarrays (ytrue,
		@@ -140,11 +144,11 @@ def eval_model(ids, X, Ytrue, Ytrue_raw, W, model, task_types, output_transforms
		Either sklearn, keras, or keras_multitask
		"""
		sorted_targets = sorted(task_types.keys())
		ypreds = model_predictions(X, model, len(task_types),
		task_types, modeltype=modeltype)
		ypreds = model_predictions(
		X, model, len(task_types), task_types, modeltype=modeltype)
		results = {}
		for target_ind, target in enumerate(sorted_targets):
		ytrue_raw, ytrue, ypred = Ytrue_raw[:, target_ind], Ytrue[:, target_ind], ypreds[target_ind]
		ytrue_raw, _, ypred = Ytrue_raw[:, target_ind], Ytrue[:, target_ind], ypreds[target_ind]
		ypred = undo_transform_outputs(ytrue_raw, ypred, output_transforms)
		results[target] = (ids, np.squeeze(ytrue_raw), np.squeeze(ypred))
		return results
		@@ -156,13 +160,13 @@ def results_to_csv(results, out, task_type="classification"):
		if task_type == "classification":
		yscores = np.around(yscores[:, 1]).astype(int)
		elif task_type == "regression":
		if type(yscores[0]) == np.ndarray:
		if isinstance(yscores[0], np.ndarray):
		yscores = yscores[:, 0]
		with open(out, "wb") as csvfile:
		csvwriter = csv.writer(csvfile, delimiter="\t")
		csvwriter.writerow(["Ids", "True", "Model-Prediction"])
		for id, ytrue, yscore in zip(mol_ids, ytrues, yscores):
		csvwriter.writerow([id, ytrue, yscore])
		for mol_id, ytrue, yscore in zip(mol_ids, ytrues, yscores):
		csvwriter.writerow([mol_id, ytrue, yscore])
		print("Writing results on test set for target %s to %s" % (target, out))


		@@ -228,7 +232,7 @@ def compute_roc_auc_scores(results, task_types):
		sample_weights = labels_to_weights(ytrue)
		try:
		score = roc_auc_score(ytrue, yscore[:, 1], sample_weight=sample_weights)
		except Exception as e:
		except Exception:
		warnings.warn("ROC AUC score calculation failed.")
		score = 0.5
		print("Target %s: AUC %f" % (target, score))

Admin message