Cleanup (c452aa55) · Commits · 钟慕尧 / deepchem

deepchem/datasets/init.py

+0 −6

Original line number	Diff line number	Diff line
		@@ -116,12 +116,6 @@ class Dataset(object):
		out_ids = "%s-ids.joblib" % basename

		if X is not None:
		############################################## DEBUG
		print("X.shape")
		print(X.shape)
		print("os.path.join(data_dir, out_X)")
		print(os.path.join(data_dir, out_X))
		############################################## DEBUG
		save_to_disk(X, os.path.join(data_dir, out_X))
		save_to_disk(X, os.path.join(data_dir, out_X_transformed))
		X_sums, X_sum_squares, X_n = compute_sums_and_nb_sample(X)

+0 −8

Original line number	Diff line number	Diff line
		@@ -72,14 +72,6 @@ class TestReload(TestAPI):
		# TODO(rbharath): Transformers don't play nice with reload! Namely,
		# reloading will cause the transform to be reapplied. This is undesirable in
		# almost all cases. Need to understand a method to fix this.
		##################################### DEBUG

		print("_run_muv_experiment()")
		print("train_dataset.get_labels()")
		print(train_dataset.get_labels())
		print("train_dataset.get_weights()")
		print(train_dataset.get_weights())
		##################################### DEBUG
		transformers = [
		BalancingTransformer(transform_w=True, dataset=train_dataset)]
		print("Transforming datasets")

+0 −22

Original line number	Diff line number	Diff line
		@@ -95,28 +95,6 @@ def _get_input_type(input_file):
		else:
		raise ValueError("Unrecognized extension %s" % file_extension)

		#def _get_fields(input_file):
		# """Get the names of fields and field_types for input data."""
		# # If CSV input, assume that first row contains labels
		# input_type = _get_input_type(input_file)
		# if input_type == "csv":
		# with open(input_file, "rb") as inp_file_obj:
		# return csv.reader(inp_file_obj).next()
		# elif input_type == "pandas-joblib":
		# df = load_from_disk(input_file)
		# return df.keys()
		# elif input_type == "pandas-pickle":
		# df = load_pickle_from_disk(input_file)
		# return df.keys()
		# # If SDF input, assume that .sdf.csv file contains labels
		# elif input_type == "sdf":
		# label_file = input_file + ".csv"
		# print("Reading labels from %s" % label_file)
		# with open(label_file, "rb") as inp_file_obj:
		# return inp_file_obj.readline()
		# else:
		# raise ValueError("Unrecognized extension for %s" % input_file)

		class DataFeaturizer(object):
		"""
		Handles loading/featurizing of chemical samples (datapoints).

+0 −6

Original line number	Diff line number	Diff line
		@@ -59,20 +59,14 @@ class SingletaskToMultitask(Model):
		[task], {task: self.task_types[task]}, self.model_params,
		self.task_model_dirs[task],
		verbosity=self.verbosity)
		#################################### DEBUG
		if y_task.size > 0:
		#################################### DEBUG
		task_model.raw_model.fit(X_task, y_task)
		#################################### DEBUG
		else:
		print("No labels for task %s" % task)
		print("Fitting on dummy dataset.")
		X_task_fake = np.zeros_like(X)
		y_task_fake = np.zeros_like(w_task)
		print("X.shape, y.shape, w.shape, y_task.shape, w_task.shape, y_task_fake.shape")
		print(X.shape, y.shape, w.shape, y_task.shape, w_task.shape, y_task_fake.shape)
		task_model.raw_model.fit(X_task_fake, y_task_fake)
		#################################### DEBUG
		task_model.save()

		def predict_on_batch(self, X):