Merge pull request #277 from rbharath/support_models (0c092b03) · Commits · 钟慕尧 / deepchem

examples/low_data/tox_attn_one_fold.py

+17 −32

Original line number	Diff line number	Diff line
		@@ -14,18 +14,18 @@ from datasets import load_tox21_convmol
		# Number of folds for split
		K = 4
		# Depth of attention module
		max_depth = 8
		max_depth = 3
		# number positive/negative ligands
		n_pos = 1
		n_pos = 10
		n_neg = 10
		# Set batch sizes for network
		test_batch_size = 128
		support_batch_size = n_pos + n_neg
		n_train_trials = 5000
		nb_epochs = 1
		n_train_trials = 2000
		n_eval_trials = 20
		n_steps_per_trial = 1
		# Sample supports without replacement (all pos/neg should be different)
		replace = False
		learning_rate = 1e-4
		log_every_n_samples = 50
		# Number of features on conv-mols
		n_feat = 71

		@@ -46,29 +46,17 @@ test_dataset = fold_datasets[-1]
		support_model = dc.nn.SequentialSupportGraph(n_feat)

		# Add layers
		# Need to add batch-norm separately to test/support due to differing
		# shapes.
		# Adding 1st layer
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add(dc.nn.GraphPool())
		# Addding 2nd layer
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add(dc.nn.GraphConv(128, activation='relu'))
		support_model.add(dc.nn.GraphPool())
		# Adding 3rd layer
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add(dc.nn.GraphPool())
		support_model.add(dc.nn.Dense(128, activation='tanh'))

		support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
		support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

		# Gather into molecules
		support_model.add_test(dc.nn.GraphGather(test_batch_size))
		support_model.add_support(dc.nn.GraphGather(support_batch_size))
		# Apply an attention lstm layer
		support_model.join(dc.nn.AttnLSTMEmbedding(
		test_batch_size, support_batch_size, max_depth))
		@@ -76,22 +64,19 @@ support_model.join(dc.nn.AttnLSTMEmbedding(
		with tf.Session() as sess:
		model = dc.models.SupportGraphClassifier(
		sess, support_model, test_batch_size=test_batch_size,
		support_batch_size=support_batch_size,
		learning_rate=1e-3, verbosity="high")
		support_batch_size=support_batch_size, learning_rate=learning_rate,
		verbosity="high")

		############################################################ DEBUG
		print("FIT")
		############################################################ DEBUG
		model.fit(train_dataset, n_trials=n_train_trials,
		n_steps_per_trial=n_steps_per_trial, n_pos=n_pos,
		n_neg=n_neg, replace=False)
		model.save()

		model.fit(train_dataset, nb_epochs=nb_epochs,
		n_episodes_per_epoch=n_train_trials,
		n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
		############################################################ DEBUG
		print("EVAL")
		############################################################ DEBUG
		scores = model.evaluate(
		test_dataset, metric, n_pos=n_pos, n_neg=n_neg, replace=replace,
		n_trials=n_eval_trials)
		test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
		print("Scores on evaluation dataset")
		print(scores)

examples/low_data/tox_res_one_fold.py

+22 −35

Original line number	Diff line number	Diff line
		"""
		Train low-data attn models on random forests. Test last fold only.
		Train low-data res models on Tox21. Test last fold only.
		"""
		from __future__ import print_function
		from __future__ import division
		@@ -12,21 +12,20 @@ import tensorflow as tf
		from datasets import load_tox21_convmol

		# Number of folds for split
		K = 12
		K = 4
		# Depth of attention module
		max_depth = 4
		max_depth = 3
		# num positive/negative ligands
		n_pos = 1
		n_pos = 10
		n_neg = 10
		# Set batch sizes for network
		test_batch_size = 128
		support_batch_size = n_pos + n_neg
		n_train_trials = 3000
		n_eval_trials = 5
		n_steps_per_trial = 1
		nb_epochs = 1
		n_train_trials = 2000
		n_eval_trials = 20
		learning_rate = 1e-4
		log_every_n_samples = 50
		# Sample supports without replacement (all pos/neg should be different)
		replace = False
		# Number of features on conv-mols
		n_feat = 71

		@@ -47,49 +46,37 @@ test_dataset = fold_datasets[-1]
		support_model = dc.nn.SequentialSupportGraph(n_feat)

		# Add layers
		# 1st conv layer + batchnorm
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		# 2nd conv layer + batchnorm
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		# 3nd conv layer + batchnorm
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphPool())
		support_model.add(dc.nn.GraphConv(128, activation='relu'))
		support_model.add(dc.nn.GraphPool())
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))

		support_model.add(dc.nn.GraphPool())
		support_model.add_test(dc.nn.GraphGather(test_batch_size))
		support_model.add_support(dc.nn.GraphGather(support_batch_size))
		support_model.add(dc.nn.Dense(128, activation='tanh'))

		support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
		support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

		# Apply a residual lstm layer
		support_model.join(dc.nn.ResiLSTMEmbedding(
		test_batch_size, support_batch_size, max_depth))


		with tf.Session() as sess:
		model = dc.models.SupportGraphClassifier(
		sess, support_model, test_batch_size=test_batch_size,
		support_batch_size=support_batch_size,
		learning_rate=1e-3, verbosity="high")
		support_batch_size=support_batch_size, learning_rate=learning_rate,
		verbosity="high")

		############################################################ DEBUG
		print("FIT")
		############################################################ DEBUG
		model.old_fit(test_dataset, n_trials=n_train_trials,
		n_steps_per_trial=n_steps_per_trial, n_pos=n_pos,
		n_neg=n_neg, log_every_n_samples=log_every_n_samples, replace=False)
		model.save()

		model.fit(train_dataset, nb_epochs=nb_epochs,
		n_episodes_per_epoch=n_train_trials,
		n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
		############################################################ DEBUG
		print("EVAL")
		############################################################ DEBUG
		scores = model.evaluate(
		test_dataset, metric, n_pos=n_pos, n_neg=n_neg, replace=replace,
		n_trials=n_eval_trials)
		test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
		print("Scores on evaluation dataset")
		print(scores)

examples/low_data/tox_rf_one_fold.py

+5 −8

Original line number	Diff line number	Diff line
		@@ -14,12 +14,10 @@ from sklearn.ensemble import RandomForestClassifier
		# 4-fold splits
		K = 4
		# num positive/negative ligands
		n_pos = 1
		n_pos = 5
		n_neg = 10
		# 10 trials on test-set
		n_trials = 10
		# Sample supports without replacement (all pos/neg should be different)
		replace = False
		n_trials = 20

		tox21_tasks, dataset, transformers = load_tox21_ecfp()

		@@ -35,15 +33,14 @@ test_dataset = fold_datasets[-1]

		# Get supports on test-set
		support_generator = dc.data.SupportGenerator(
		test_dataset, range(len(test_dataset.get_task_names())), n_pos, n_neg,
		n_trials, replace)
		test_dataset, n_pos, n_neg, n_trials)

		# Compute accuracies
		task_scores = {task: [] for task in range(len(test_dataset.get_task_names()))}
		for (task, support) in support_generator:
		# Train model on support
		sklearn_model = RandomForestClassifier(
		class_weight="balanced", n_estimators=50)
		class_weight="balanced", n_estimators=100)
		model = dc.models.SklearnModel(sklearn_model)
		model.fit(support)

		@@ -53,7 +50,7 @@ for (task, support) in support_generator:
		y_pred = model.predict_proba(task_dataset)
		score = metric.compute_metric(
		task_dataset.y, y_pred, task_dataset.w)
		#print("Score on task %s is %s" % (str(task), str(score)))
		print("Score on task %s is %s" % (str(task), str(score)))
		task_scores[task].append(score)

		# Join information for all tasks.

examples/low_data/tox_siamese_one_fold.py

+19 −38

Original line number	Diff line number	Diff line
		@@ -14,16 +14,17 @@ from datasets import load_tox21_convmol
		# Number of folds for split
		K = 4
		# num positive/negative ligands
		n_pos = 3
		n_pos = 10
		n_neg = 10
		# Set batch sizes for network
		test_batch_size = 100
		test_batch_size = 128
		support_batch_size = n_pos + n_neg
		n_train_trials = 3000
		nb_epochs = 1
		n_train_trials = 2000
		n_eval_trials = 20
		n_steps_per_trial = 1
		# Sample supports without replacement (all pos/neg should be different)
		replace = False
		learning_rate = 1e-4
		log_every_n_samples = 50
		# Number of features on conv-mols
		n_feat = 71

		@@ -44,53 +45,33 @@ test_dataset = fold_datasets[-1]
		support_model = dc.nn.SequentialSupportGraph(n_feat)

		# Add layers

		# Adding 1st layer
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		# Need to add batch-norm to test/support due to differing shapes.
		# output will be (n_atoms, 64)
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		# Addding 2nd layer
		# output will be (n_atoms, 64)
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add(dc.nn.GraphPool())
		# Adding 3rd layer
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		# Adding 4th layer
		support_model.add(dc.nn.GraphConv(128, activation='relu'))
		support_model.add(dc.nn.GraphPool())
		support_model.add(dc.nn.GraphConv(64, activation='relu'))
		support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		## Adding 5th layer
		#support_model.add(dc.nn.GraphConv(64, activation='relu'))
		#support_model.add_support(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		#support_model.add_test(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
		support_model.add(dc.nn.GraphPool())
		support_model.add(dc.nn.Dense(128, activation='tanh'))

		# Gather atoms into batches
		support_model.add_test(dc.nn.GraphGather(test_batch_size))
		support_model.add_support(dc.nn.GraphGather(support_batch_size))
		support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
		support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

		with tf.Session() as sess:
		model = dc.models.SupportGraphClassifier(
		sess, support_model, test_batch_size=test_batch_size,
		support_batch_size=support_batch_size, learning_rate=3e-3, verbosity="high")
		support_batch_size=support_batch_size, learning_rate=learning_rate,
		verbosity="high")

		############################################################ DEBUG
		print("FIT")
		############################################################ DEBUG
		model.fit(train_dataset, n_trials=n_train_trials,
		n_steps_per_trial=n_steps_per_trial, n_pos=n_pos,
		n_neg=n_neg, replace=False)
		model.save()

		model.fit(train_dataset, nb_epochs=nb_epochs,
		n_episodes_per_epoch=n_train_trials,
		n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
		############################################################ DEBUG
		print("EVAL")
		############################################################ DEBUG
		scores = model.evaluate(
		test_dataset, metric, n_pos=n_pos, n_neg=n_neg, replace=replace,
		n_trials=n_eval_trials)
		print("Scores on held-out dataset")
		test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
		print("Scores on evaluation dataset")
		print(scores)

Admin message