Unverified Commit fac87370 authored by Karl Leswing's avatar Karl Leswing Committed by GitHub
Browse files

Merge pull request #1407 from vsag96/FIX-#1043

WIP:Fix #1403
parents c9fe0176 a20bbe6b
Loading
Loading
Loading
Loading
+17 −28
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ import tempfile
import numpy as np
import tensorflow as tf
import deepchem as dc
from datasets import load_sider_convmol
from deepchem.models.tensorgraph.models.graph_models import GraphConvModel

# 4-fold splits
K = 4
@@ -19,23 +19,21 @@ n_neg = 10
# 10 trials on test-set
n_trials = 20

sider_tasks, dataset, transformers = load_sider_convmol()
sider_tasks, fold_datasets, transformers = dc.molnet.load_sider(
    featurizer='GraphConv', split="task")

# Define metric
metric = dc.metrics.Metric(dc.metrics.roc_auc_score, mode="classification")

task_splitter = dc.splits.TaskSplitter()
fold_datasets = task_splitter.k_fold_split(dataset, K)

train_folds = fold_datasets[:-1]
train_dataset = dc.splits.merge_fold_datasets(train_folds)
test_dataset = fold_datasets[-1]

# Get supports on test-set
support_generator = dc.data.SupportGenerator(test_dataset, n_pos, n_neg,
                                             n_trials)

# Compute accuracies

task_scores = {task: [] for task in range(len(test_dataset.get_task_names()))}

for trial_num, (task, support) in enumerate(support_generator):
@@ -45,27 +43,9 @@ for trial_num, (task, support) in enumerate(support_generator):
  n_feat = 75
  # Batch size of models
  batch_size = 50
  graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
  graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(128, 64, activation='relu'))
  graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(64, 128, activation='relu'))
  graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.Dense(128, 64, activation='tanh'))
  graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))

  model = dc.models.MultitaskGraphClassifier(
      graph_model,
      1,
      n_feat,
      batch_size=batch_size,
      learning_rate=1e-3,
      learning_rate_decay_time=1000,
      optimizer_type="adam",
      beta1=.9,
      beta2=.999)

  #graph_model = dc.nn.SequentialGraph(n_feat)
  model = GraphConvModel(
      1, graph_conv_layers=[64, 128, 64], batch_size=batch_size)
  # Fit trained model
  model.fit(support, nb_epoch=10)

@@ -89,4 +69,13 @@ print(mean_task_scores)
print("Standard Deviations")
print(std_task_scores)
print("Median of Mean Scores")
"""
To support both python 3.x and 2.7
dict.values() returns an object of type dict_values
and np.median shouts loudly if this is the case so 
converted it to list before passing it to np.array()
"""
try:
  print(np.median(np.array(mean_task_scores.values())))
except TypeError as e:
  print(np.median(np.array(list(mean_task_scores.values()))))