Merge pull request #418 from miaecle/HIV (55437e02) · Commits · 钟慕尧 / deepchem

README.md

+34 −10

Original line number	Diff line number	Diff line
		@@ -221,6 +221,12 @@ Index splitting
		\| \|Multitask network \|0.934 \|0.830 \|
		\| \|robust MT-NN \|0.949 \|0.827 \|
		\| \|graph convolution \|0.946 \|0.860 \|
		\|hiv \|logistic regression \|0.864 \|0.739 \|
		\| \|Random Forest \|0.999 \|0.720 \|
		\| \|IRV \|0.841 \|0.724 \|
		\| \|Multitask network \|0.761 \|0.652 \|
		\| \|robust MT-NN \|0.780 \|0.708 \|
		\| \|graph convolution \|0.876 \|0.779 \|

		Random splitting

		@@ -256,6 +262,12 @@ Random splitting
		\| \|Multitask network \|0.951 \|0.834 \|
		\| \|robust MT-NN \|0.959 \|0.830 \|
		\| \|graph convolution \|0.975 \|0.876 \|
		\|hiv \|logistic regression \|0.860 \|0.806 \|
		\| \|Random Forest \|0.999 \|0.850 \|
		\| \|IRV \|0.839 \|0.809 \|
		\| \|Multitask network \|0.742 \|0.715 \|
		\| \|robust MT-NN \|0.753 \|0.727 \|
		\| \|graph convolution \|0.847 \|0.803 \|

		Scaffold splitting

		@@ -291,6 +303,12 @@ Scaffold splitting
		\| \|Multitask network \|0.947 \|0.862 \|
		\| \|robust MT-NN \|0.953 \|0.890 \|
		\| \|graph convolution \|0.957 \|0.823 \|
		\|hiv \|logistic regression \|0.858 \|0.798 \|
		\| \|Random Forest \|0.946 \|0.562 \|
		\| \|IRV \|0.847 \|0.811 \|
		\| \|Multitask network \|0.775 \|0.765 \|
		\| \|robust MT-NN \|0.785 \|0.748 \|
		\| \|graph convolution \|0.867 \|0.769 \|

		* Regression

		@@ -329,20 +347,19 @@ Scaffold splitting
		\|chembl \|MT-NN regression \|Index \|0.443 \|0.427 \|
		\| \|MT-NN regression \|Random \|0.464 \|0.434 \|
		\| \|MT-NN regression \|Scaffold \|0.484 \|0.361 \|
		\|qm7 \|NN regression \|Index \|0.994 \|0.969 \|
		\| \|NN regression \|Random \|0.995 \|0.992 \|
		\| \|NN regression \|Stratified \|0.992 \|0.992 \|
		\|qm7b \|MT-NN regression \|Index \|0.883 \|0.785 \|
		\| \|MT-NN regression \|Random \|0.864 \|0.838 \|
		\| \|MT-NN regression \|Stratified \|0.871 \|0.847 \|
		\|qm7 \|NN regression \|Index \|0.997 \|0.986 \|
		\| \|NN regression \|Random \|0.999 \|0.999 \|
		\| \|NN regression \|Stratified \|0.999 \|0.999 \|
		\|qm7b \|MT-NN regression \|Index \|0.931 \|0.803 \|
		\| \|MT-NN regression \|Random \|0.923 \|0.884 \|
		\| \|MT-NN regression \|Stratified \|0.934 \|0.884 \|
		\|kaggle \|MT-NN regression \|User-defined\|0.748 \|0.452 \|

		\|Dataset \|Model \|Splitting \|Train score/MAE(kcal/mol)\|Valid score/MAE(kcal/mol)\|
		\|----------------\|-----------------\|------------\|-------------------------\|-------------------------\|
		\|qm7 \|NN regression \|Index \|22.1 \|23.2 \|
		\| \|NN regression \|Random \|16.2 \|17.7 \|
		\| \|NN regression \|Stratified \|20.5 \|20.8 \|
		\| \|NN regression \|User-defined\|9.0 \|9.5 \|
		\|qm7 \|NN regression \|Index \|11.0 \|12.0 \|
		\| \|NN regression \|Random \|7.12 \|7.53 \|
		\| \|NN regression \|Stratified \|6.61 \|7.34 \|


		* General features
		@@ -357,6 +374,7 @@ Number of tasks and examples in the datasets
		\|sider \|27 \|1427 \|
		\|toxcast \|617 \|8615 \|
		\|clintox \|2 \|1491 \|
		\|hiv \|1 \|41913 \|
		\|delaney \|1 \|1128 \|
		\|sampl \|1 \|643 \|
		\|kaggle \|15 \|173065 \|
		@@ -404,6 +422,12 @@ Time needed for benchmark test(~20h in total)
		\| \|random forest \|15 \|200 \|
		\| \|IRV \|15 \|10 \|
		\| \|graph convolution \|20 \|130 \|
		\|hiv \|logistic regression \|180 \|40 \|
		\| \|Multitask network \|180 \|350 \|
		\| \|robust MT-NN \|180 \|450 \|
		\| \|random forest \|180 \|2800 \|
		\| \|IRV \|180 \|200 \|
		\| \|graph convolution \|180 \|1300 \|
		\|delaney \|MT-NN regression \|10 \|40 \|
		\| \|graphconv regression\|10 \|40 \|
		\| \|random forest \|10 \|30 \|

deepchem/models/tensorflow_models/lr.py

+48 −53

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@ from deepchem.utils.save import log
		from deepchem.data import pad_features
		from deepchem.metrics import to_one_hot


		def weight_decay(penalty_type, penalty):
		# due to the different shape of weight(ndims=2) and bias(ndims=1),
		# will using this version for logreg
		@@ -40,6 +41,7 @@ def weight_decay(penalty_type, penalty):

		class TensorflowLogisticRegression(TensorflowGraphModel):
		""" A simple tensorflow based logistic regression model. """

		def build(self, graph, name_scopes, training):
		"""Constructs the graph architecture of model: n_tasks * sigmoid nodes.

		@@ -47,15 +49,13 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
		batch_size x n_features.
		"""
		placeholder_scope = TensorflowGraph.get_placeholder_scope(
		graph, name_scopes)
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph,
		name_scopes)
		n_features = self.n_features
		with graph.as_default():
		with placeholder_scope:
		self.mol_features = tf.placeholder(
		tf.float32,
		shape=[None, n_features],
		name='mol_features')
		tf.float32, shape=[None, n_features], name='mol_features')

		weight_init_stddevs = self.weight_init_stddevs
		bias_init_consts = self.bias_init_consts
		@@ -66,23 +66,22 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		tensor=self.mol_features,
		size=1,
		weight_init=tf.truncated_normal(
		shape=[self.n_features, 1],
		stddev=weight_init_stddevs[0]),
		bias_init=tf.constant(value=bias_init_consts[0],
		shape=[1]))
		shape=[self.n_features, 1], stddev=weight_init_stddevs[0]),
		bias_init=tf.constant(value=bias_init_consts[0], shape=[1]))
		lg_list.append(lg)

		return lg_list

		def add_label_placeholders(self, graph, name_scopes):
		#label placeholders with size batch_size * 1
		labels = []
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
		placeholder_scope = TensorflowGraph.get_placeholder_scope(graph,
		name_scopes)
		with placeholder_scope:
		for task in range(self.n_tasks):
		labels.append(tf.identity(
		tf.placeholder(tf.float32, shape=[None,1],
		name='labels_%d' % task)))
		labels.append(
		tf.identity(
		tf.placeholder(
		tf.float32, shape=[None, 1], name='labels_%d' % task)))
		return labels

		def add_training_cost(self, graph, name_scopes, output, labels, weights):
		@@ -94,8 +93,8 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		with TensorflowGraph.shared_name_scope('costs', graph, name_scopes):
		for task in range(self.n_tasks):
		task_str = str(task).zfill(len(str(self.n_tasks)))
		with TensorflowGraph.shared_name_scope(
		'cost_{}'.format(task_str), graph, name_scopes):
		with TensorflowGraph.shared_name_scope('cost_{}'.format(task_str),
		graph, name_scopes):
		with tf.name_scope('weighted'):
		weighted_cost = self.cost(output[task], labels[task],
		weights[task])
		@@ -106,12 +105,13 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		# non-zero weight examples in the batch. Also, instead of using
		# tf.reduce_mean (which can put ops on the CPU) we explicitly
		# calculate with div/sum so it stays on the GPU.
		gradient_cost = tf.div(tf.reduce_sum(weighted_cost),
		self.batch_size)
		gradient_cost = tf.div(
		tf.reduce_sum(weighted_cost), self.batch_size)
		gradient_costs.append(gradient_cost)

		# aggregated costs
		with TensorflowGraph.shared_name_scope('aggregated', graph, name_scopes):
		with TensorflowGraph.shared_name_scope('aggregated', graph,
		name_scopes):
		with tf.name_scope('gradient'):
		loss = tf.add_n(gradient_costs)

		@@ -124,8 +124,8 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		return loss

		def cost(self, logits, labels, weights):
		return tf.mul(tf.nn.sigmoid_cross_entropy_with_logits(logits, labels),
		weights)
		return tf.mul(
		tf.nn.sigmoid_cross_entropy_with_logits(logits, labels), weights)

		def add_output_ops(self, graph, output):
		# adding output nodes of sigmoid function
		@@ -153,8 +153,7 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		orig_dict["weights_%d" % task] = w_b[:, task]
		else:
		# Dummy placeholders
		orig_dict["weights_%d" % task] = np.ones(
		(self.batch_size,))
		orig_dict["weights_%d" % task] = np.ones((self.batch_size,))
		return TensorflowGraph.get_feed_dict(orig_dict)

		def predict_proba_on_batch(self, X):
		@@ -173,17 +172,15 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		# transfer 2D prediction tensor to 2D x n_classes(=2)
		complimentary = np.ones(np.shape(batch_outputs))
		complimentary = complimentary - batch_outputs
		batch_outputs = np.squeeze(np.stack(arrays = [complimentary,
		batch_outputs],
		axis = 2))
		batch_outputs = np.concatenate(
		[complimentary, batch_outputs], axis=batch_outputs.ndim - 1)
		# reshape to batch_size x n_tasks x ...
		if batch_outputs.ndim == 3:
		batch_outputs = batch_outputs.transpose((1, 0, 2))
		elif batch_outputs.ndim == 2:
		batch_outputs = batch_outputs.transpose((1, 0))
		else:
		raise ValueError(
		'Unrecognized rank combination for output: %s ' %
		raise ValueError('Unrecognized rank combination for output: %s ' %
		(batch_outputs.shape,))

		outputs = batch_outputs
		@@ -211,21 +208,19 @@ class TensorflowLogisticRegression(TensorflowGraphModel):
		# transfer 2D prediction tensor to 2D x n_classes(=2)
		complimentary = np.ones(np.shape(batch_output))
		complimentary = complimentary - batch_output
		batch_output = np.squeeze(np.stack(arrays = [complimentary,
		batch_output],
		axis = 2))
		batch_output = np.concatenate(
		[complimentary, batch_output], axis=batch_output.ndim - 1)
		# reshape to batch_size x n_tasks x ...
		if batch_output.ndim == 3:
		batch_output = batch_output.transpose((1, 0, 2))
		elif batch_output.ndim == 2:
		batch_output = batch_output.transpose((1, 0))
		else:
		raise ValueError(
		'Unrecognized rank combination for output: %s' %
		raise ValueError('Unrecognized rank combination for output: %s' %
		(batch_output.shape,))
		output.append(batch_output)

		outputs = np.array(from_one_hot(
		np.squeeze(np.concatenate(output)), axis=-1))
		outputs = np.array(
		from_one_hot(np.squeeze(np.concatenate(output)), axis=-1))

		return np.copy(outputs)

deepchem/trans/transformers.py

+90 −11

Original line number	Diff line number	Diff line
		@@ -8,8 +8,9 @@ from __future__ import unicode_literals
		import os

		import numpy as np

		import time
		import deepchem as dc
		import tensorflow as tf
		from deepchem.data import NumpyDataset


		@@ -654,14 +655,46 @@ class IRVTransformer():
		"""
		features = []
		similarity_xs = similarity * np.sign(w)
		for similarity_x in similarity_xs:
		pair = list(zip(similarity_x, range(len(similarity_x))))
		pair.sort(key=lambda x: x[0], reverse=True)
		if pair[0][0] >= 1:
		pair = pair[1:self.K + 1]
		[target_len, reference_len] = similarity_xs.shape
		g_temp = tf.Graph()
		values = []
		top_labels = []
		with g_temp.as_default():
		labels_tf = tf.constant(y)
		similarity_placeholder = tf.placeholder(
		dtype=tf.float64, shape=(None, reference_len))
		value, indice = tf.nn.top_k(
		similarity_placeholder, k=self.K + 1, sorted=True)
		# the tf graph here pick up the (K+1) highest similarity values
		# and their indices
		top_label = tf.gather(labels_tf, indice)
		# map the indices to labels
		feed_dict = {}
		with tf.Session() as sess:
		for count in range(target_len // 100 + 1):
		feed_dict[similarity_placeholder] = similarity_xs[count * 100:min((
		count + 1) * 100, target_len), :]
		# generating batch of data by slicing similarity matrix
		# into 100*reference_dataset_length
		fetched_values = sess.run([value, top_label], feed_dict=feed_dict)
		values.append(fetched_values[0])
		top_labels.append(fetched_values[1])
		values = np.concatenate(values, axis=0)
		top_labels = np.concatenate(top_labels, axis=0)
		# concatenate batches of data together
		for count in range(values.shape[0]):
		if values[count, 0] == 1:
		features.append(
		np.concatenate([
		values[count, 1:(self.K + 1)], top_labels[count, 1:(self.K + 1)]
		]))
		# highest similarity is 1: target is in the reference
		# use the following K points
		else:
		pair = pair[:self.K]
		features.append([z[0] for z in pair] + [y[int(z[1])] for z in pair])
		features.append(
		np.concatenate(
		[values[count, 0:self.K], top_labels[count, 0:self.K]]))
		# highest less than 1: target not in the reference, use top K points
		return features

		def X_transform(self, X_target):
		@@ -682,14 +715,60 @@ class IRVTransformer():
		"""
		X_target2 = []
		n_features = X_target.shape[1]
		similarity = np.matmul(X_target, np.transpose(self.X)) / (
		n_features - np.matmul(1 - X_target, np.transpose(1 - self.X)))
		print('start similarity calculation')
		time1 = time.time()
		similarity = IRVTransformer.matrix_mul(X_target, np.transpose(self.X)) / (
		n_features - IRVTransformer.matrix_mul(1 - X_target,
		np.transpose(1 - self.X)))
		time2 = time.time()
		print('similarity calculation takes %i s' % (time2 - time1))
		for i in range(self.n_tasks):
		X_target2.append(self.realize(similarity, self.y[:, i], self.w[:, i]))
		return np.concatenate([z for z in np.array(X_target2)], axis=1)

		@staticmethod
		def matrix_mul(X1, X2, shard_size=5000):
		""" Calculate matrix multiplication for big matrix,
		X1 and X2 are sliced into pieces with shard_size rows(columns)
		then multiplied together and concatenated to the proper size
		"""
		X1 = np.float_(X1)
		X2 = np.float_(X2)
		X1_shape = X1.shape
		X2_shape = X2.shape
		assert X1_shape[1] == X2_shape[0]
		X1_iter = X1_shape[0] // shard_size + 1
		X2_iter = X2_shape[1] // shard_size + 1
		all_result = np.zeros((1,))
		for X1_id in range(X1_iter):
		result = np.zeros((1,))
		for X2_id in range(X2_iter):
		partial_result = np.matmul(X1[X1_id * shard_size:min((
		X1_id + 1) * shard_size, X1_shape[0]), :],
		X2[:, X2_id * shard_size:min((
		X2_id + 1) * shard_size, X2_shape[1])])
		# calculate matrix multiplicatin on slices
		if result.size == 1:
		result = partial_result
		else:
		result = np.concatenate((result, partial_result), axis=1)
		# concatenate the slices together
		del partial_result
		if all_result.size == 1:
		all_result = result
		else:
		all_result = np.concatenate((all_result, result), axis=0)
		del result
		return all_result

		def transform(self, dataset):
		X_trans = self.X_transform(dataset.X)
		X_length = dataset.X.shape[0]
		X_trans = []
		for count in range(X_length // 5000 + 1):
		X_trans.append(
		self.X_transform(dataset.X[count * 5000:min((count + 1) * 5000,
		X_length), :]))
		X_trans = np.concatenate(X_trans, axis=0)
		return NumpyDataset(X_trans, dataset.y, dataset.w, ids=None)

		def untransform(self, z):

examples/benchmark.py

+11 −7

Original line number	Diff line number	Diff line
		@@ -12,7 +12,7 @@ Giving classification performances of:
		RobustMultitaskDNN(tf_robust),
		Logistic regression(logreg), IRV(irv)
		Graph convolution(graphconv)
		on datasets: muv, pcba, tox21, sider, toxcast, clintox
		on datasets: muv, pcba, tox21, sider, toxcast, clintox, hiv

		Giving regression performances of:
		MultitaskDNN(tf_regression),
		@@ -54,6 +54,7 @@ from chembl.chembl_datasets import load_chembl
		from qm7.qm7_datasets import load_qm7_from_mat, load_qm7b_from_mat
		from sampl.sampl_datasets import load_sampl
		from clintox.clintox_datasets import load_clintox
		from hiv.hiv_datasets import load_hiv


		def benchmark_loading_datasets(hyper_parameters,
		@@ -72,7 +73,7 @@ def benchmark_loading_datasets(hyper_parameters,
		hyper parameters including layer size, dropout, learning rate, etc.
		dataset: string, optional (default='tox21')
		choice of which dataset to use, should be: tox21, muv, sider,
		toxcast, pcba, delaney, kaggle, nci, clintox, pdbbind, chembl,
		toxcast, pcba, delaney, kaggle, nci, clintox, hiv, pdbbind, chembl,
		qm7, qm7b, sampl
		model: string, optional (default='tf')
		choice of which model to use, should be: rf, tf, tf_robust, logreg,
		@@ -83,7 +84,7 @@ def benchmark_loading_datasets(hyper_parameters,
		path of result file
		"""

		if dataset in ['muv', 'pcba', 'tox21', 'sider', 'toxcast', 'clintox']:
		if dataset in ['muv', 'pcba', 'tox21', 'sider', 'toxcast', 'clintox', 'hiv']:
		mode = 'classification'
		elif dataset in [
		'kaggle', 'delaney', 'nci', 'pdbbind', 'chembl', 'qm7', 'qm7b', 'sampl'
		@@ -151,7 +152,8 @@ def benchmark_loading_datasets(hyper_parameters,
		'qm7': load_qm7_from_mat,
		'qm7b': load_qm7b_from_mat,
		'sampl': load_sampl,
		'clintox': load_clintox
		'clintox': load_clintox,
		'hiv': load_hiv
		}

		print('-------------------------------------')
		@@ -786,7 +788,7 @@ if __name__ == '__main__':
		dest='dataset_args',
		default=[],
		help='Choice of dataset: tox21, sider, muv, toxcast, pcba, ' +
		'kaggle, delaney, nci, pdbbind, chembl, sampl, qm7, qm7b, clintox')
		'kaggle, delaney, nci, pdbbind, chembl, sampl, qm7, qm7b, clintox, hiv')
		parser.add_argument(
		'-t',
		action='store_true',
		@@ -811,7 +813,7 @@ if __name__ == '__main__':
		#irv, rf, rf_regression should be assigned manually
		if len(datasets) == 0:
		datasets = [
		'tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox', 'sampl',
		'tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox', 'hiv', 'sampl',
		'delaney', 'nci', 'kaggle', 'pdbbind', 'chembl', 'qm7b'
		]

		@@ -898,7 +900,9 @@ if __name__ == '__main__':

		for split in splitters:
		for dataset in datasets:
		if dataset in ['tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox']:
		if dataset in [
		'tox21', 'sider', 'muv', 'toxcast', 'pcba', 'clintox', 'hiv'
		]:
		for model in models:
		if model in ['tf', 'tf_robust', 'logreg', 'graphconv', 'rf', 'irv']:
		benchmark_loading_datasets(

examples/hiv/HIV.csv

0 → 100644

+41914 −0

File added.

Preview size limit exceeded, changes collapsed.

Original line number	Diff line number	Diff line
		@@ -221,6 +221,12 @@ Index splitting
		\| \|Multitask network \|0.934 \|0.830 \|
		\| \|robust MT-NN \|0.949 \|0.827 \|
		\| \|graph convolution \|0.946 \|0.860 \|
		\|hiv \|logistic regression \|0.864 \|0.739 \|
		\| \|Random Forest \|0.999 \|0.720 \|
		\| \|IRV \|0.841 \|0.724 \|
		\| \|Multitask network \|0.761 \|0.652 \|
		\| \|robust MT-NN \|0.780 \|0.708 \|
		\| \|graph convolution \|0.876 \|0.779 \|

		Random splitting

		@@ -256,6 +262,12 @@ Random splitting
		\| \|Multitask network \|0.951 \|0.834 \|
		\| \|robust MT-NN \|0.959 \|0.830 \|
		\| \|graph convolution \|0.975 \|0.876 \|
		\|hiv \|logistic regression \|0.860 \|0.806 \|
		\| \|Random Forest \|0.999 \|0.850 \|
		\| \|IRV \|0.839 \|0.809 \|
		\| \|Multitask network \|0.742 \|0.715 \|
		\| \|robust MT-NN \|0.753 \|0.727 \|
		\| \|graph convolution \|0.847 \|0.803 \|

		Scaffold splitting

		@@ -291,6 +303,12 @@ Scaffold splitting
		\| \|Multitask network \|0.947 \|0.862 \|
		\| \|robust MT-NN \|0.953 \|0.890 \|
		\| \|graph convolution \|0.957 \|0.823 \|
		\|hiv \|logistic regression \|0.858 \|0.798 \|
		\| \|Random Forest \|0.946 \|0.562 \|
		\| \|IRV \|0.847 \|0.811 \|
		\| \|Multitask network \|0.775 \|0.765 \|
		\| \|robust MT-NN \|0.785 \|0.748 \|
		\| \|graph convolution \|0.867 \|0.769 \|

		* Regression

		@@ -329,20 +347,19 @@ Scaffold splitting
		\|chembl \|MT-NN regression \|Index \|0.443 \|0.427 \|
		\| \|MT-NN regression \|Random \|0.464 \|0.434 \|
		\| \|MT-NN regression \|Scaffold \|0.484 \|0.361 \|
		\|qm7 \|NN regression \|Index \|0.994 \|0.969 \|
		\| \|NN regression \|Random \|0.995 \|0.992 \|
		\| \|NN regression \|Stratified \|0.992 \|0.992 \|
		\|qm7b \|MT-NN regression \|Index \|0.883 \|0.785 \|
		\| \|MT-NN regression \|Random \|0.864 \|0.838 \|
		\| \|MT-NN regression \|Stratified \|0.871 \|0.847 \|
		\|qm7 \|NN regression \|Index \|0.997 \|0.986 \|
		\| \|NN regression \|Random \|0.999 \|0.999 \|
		\| \|NN regression \|Stratified \|0.999 \|0.999 \|
		\|qm7b \|MT-NN regression \|Index \|0.931 \|0.803 \|
		\| \|MT-NN regression \|Random \|0.923 \|0.884 \|
		\| \|MT-NN regression \|Stratified \|0.934 \|0.884 \|
		\|kaggle \|MT-NN regression \|User-defined\|0.748 \|0.452 \|

		\|Dataset \|Model \|Splitting \|Train score/MAE(kcal/mol)\|Valid score/MAE(kcal/mol)\|
		\|----------------\|-----------------\|------------\|-------------------------\|-------------------------\|
		\|qm7 \|NN regression \|Index \|22.1 \|23.2 \|
		\| \|NN regression \|Random \|16.2 \|17.7 \|
		\| \|NN regression \|Stratified \|20.5 \|20.8 \|
		\| \|NN regression \|User-defined\|9.0 \|9.5 \|
		\|qm7 \|NN regression \|Index \|11.0 \|12.0 \|
		\| \|NN regression \|Random \|7.12 \|7.53 \|
		\| \|NN regression \|Stratified \|6.61 \|7.34 \|


		* General features
		@@ -357,6 +374,7 @@ Number of tasks and examples in the datasets
		\|sider \|27 \|1427 \|
		\|toxcast \|617 \|8615 \|
		\|clintox \|2 \|1491 \|
		\|hiv \|1 \|41913 \|
		\|delaney \|1 \|1128 \|
		\|sampl \|1 \|643 \|
		\|kaggle \|15 \|173065 \|
		@@ -404,6 +422,12 @@ Time needed for benchmark test(~20h in total)
		\| \|random forest \|15 \|200 \|
		\| \|IRV \|15 \|10 \|
		\| \|graph convolution \|20 \|130 \|
		\|hiv \|logistic regression \|180 \|40 \|
		\| \|Multitask network \|180 \|350 \|
		\| \|robust MT-NN \|180 \|450 \|
		\| \|random forest \|180 \|2800 \|
		\| \|IRV \|180 \|200 \|
		\| \|graph convolution \|180 \|1300 \|
		\|delaney \|MT-NN regression \|10 \|40 \|
		\| \|graphconv regression\|10 \|40 \|
		\| \|random forest \|10 \|30 \|

Admin message