Commit f792d5d2 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Removing files not used in paper

parent f8caf53d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -175,7 +175,7 @@ def load_sider_convmol():

  loader = dc.data.CSVLoader(
      tasks=SIDER_tasks, smiles_field="smiles", featurizer=featurizer)
  dataset = loader.featurize(dataset_file, debug=True)
  dataset = loader.featurize(dataset_file)
  print("%d datapoints in SIDER dataset" % len(dataset))

  # Initialize transformers
+0 −83
Original line number Diff line number Diff line
"""
Train low-data attn models on MUV. Test last fold only.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import tempfile
import numpy as np
import deepchem as dc
import tensorflow as tf
from datasets import load_muv_convmol

# Number of folds for split 
K = 4 
# Depth of attention module
max_depth = 3
# number positive/negative ligands
n_pos = 10
n_neg = 10
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 71

muv_tasks, dataset, transformers = load_muv_convmol()

# Define metric
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, verbosity="high", mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)

train_folds = fold_datasets[:-1] 
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)

# Add layers
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.Dense(128, activation='tanh'))

support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

# Apply an attention lstm layer
support_model.join(dc.nn.AttnLSTMEmbedding(
    test_batch_size, support_batch_size, max_depth))

with tf.Session() as sess:
  model = dc.models.SupportGraphClassifier(
    sess, support_model, test_batch_size=test_batch_size,
    support_batch_size=support_batch_size, learning_rate=learning_rate,
    verbosity="high")

  # Turning off training to investigate
  ############################################################# DEBUG
  #print("FIT")
  ############################################################# DEBUG
  #model.fit(train_dataset, nb_epochs=nb_epochs, 
  #          n_episodes_per_epoch=n_train_trials,
  #          n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
  ############################################################ DEBUG
  print("EVAL")
  ############################################################ DEBUG
  scores = model.evaluate(
      test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
  print("Scores on evaluation dataset")
  print(scores)
+0 −83
Original line number Diff line number Diff line
"""
Train low-data res models on MUV. Test last fold only.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import tempfile
import numpy as np
import deepchem as dc
import tensorflow as tf
from datasets import load_muv_convmol

# Number of folds for split 
K = 4 
# Depth of attention module
max_depth = 3
# num positive/negative ligands
n_pos = 10
n_neg = 10
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 71

muv_tasks, dataset, transformers = load_muv_convmol()

# Define metric
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, verbosity="high", mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)

train_folds = fold_datasets[:-1] 
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)

# Add layers
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.Dense(128, activation='tanh'))

support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

# Apply a residual lstm layer
support_model.join(dc.nn.ResiLSTMEmbedding(
    test_batch_size, support_batch_size, max_depth))

with tf.Session() as sess:
  model = dc.models.SupportGraphClassifier(
    sess, support_model, test_batch_size=test_batch_size,
    support_batch_size=support_batch_size, learning_rate=learning_rate,
    verbosity="high")

  # No training to see effect of random features.
  ############################################################# DEBUG
  #print("FIT")
  ############################################################# DEBUG
  #model.fit(train_dataset, nb_epochs=nb_epochs,
  #          n_episodes_per_epoch=n_train_trials,
  #          n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
  ############################################################ DEBUG
  print("EVAL")
  ############################################################ DEBUG
  scores = model.evaluate(
      test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
  print("Scores on evaluation dataset")
  print(scores)
+0 −77
Original line number Diff line number Diff line
"""
Train low-data siamese models on MUV. Test last fold only.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
import deepchem as dc
import tensorflow as tf
from datasets import load_muv_convmol

# Number of folds for split 
K = 4 
# num positive/negative ligands
n_pos = 10
n_neg = 10
# Set batch sizes for network
test_batch_size = 128
support_batch_size = n_pos + n_neg
nb_epochs = 1
n_train_trials = 2000
n_eval_trials = 20 
n_steps_per_trial = 1
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 71

muv_tasks, dataset, transformers = load_muv_convmol()

# Define metric
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, verbosity="high", mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)

train_folds = fold_datasets[:-1] 
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Train support model on train
support_model = dc.nn.SequentialSupportGraph(n_feat)

# Add layers
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(128, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.GraphConv(64, activation='relu'))
support_model.add(dc.nn.GraphPool())
support_model.add(dc.nn.Dense(128, activation='tanh'))

support_model.add_test(dc.nn.GraphGather(test_batch_size, activation='tanh'))
support_model.add_support(dc.nn.GraphGather(support_batch_size, activation='tanh'))

with tf.Session() as sess:
  model = dc.models.SupportGraphClassifier(
    sess, support_model, test_batch_size=test_batch_size,
    support_batch_size=support_batch_size, learning_rate=learning_rate,
    verbosity="high")

  # Turning off training to gauge the accuracy of the random model.
  ############################################################# DEBUG
  #print("FIT")
  ############################################################# DEBUG
  #model.fit(train_dataset, nb_epochs=nb_epochs,
  #          n_episodes_per_epoch=n_train_trials,
  #          n_pos=n_pos, n_neg=n_neg, log_every_n_samples=log_every_n_samples)
  ############################################################ DEBUG
  print("EVAL")
  ############################################################ DEBUG
  scores = model.evaluate(
      test_dataset, metric, n_pos, n_neg, n_trials=n_eval_trials)
  print("Scores on evaluation dataset")
  print(scores)
+3 −5
Original line number Diff line number Diff line
@@ -27,13 +27,12 @@ n_eval_trials = 20
learning_rate = 1e-4
log_every_n_samples = 50
# Number of features on conv-mols
n_feat = 71
n_feat = 75

sider_tasks, dataset, transformers = load_sider_convmol()

# Define metric
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, verbosity="high", mode="classification")
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)
@@ -64,8 +63,7 @@ support_model.join(dc.nn.AttnLSTMEmbedding(
with tf.Session() as sess:
  model = dc.models.SupportGraphClassifier(
    sess, support_model, test_batch_size=test_batch_size,
    support_batch_size=support_batch_size, learning_rate=learning_rate,
    verbosity="high")
    support_batch_size=support_batch_size, learning_rate=learning_rate)

  ############################################################ DEBUG
  print("FIT")
Loading