Commit 64170e5b authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Cleanup

parent bb95f8e9
Loading
Loading
Loading
Loading
+0 −16
Original line number Diff line number Diff line
@@ -181,22 +181,6 @@ class Metric(object):
      metric_value = self.compute_singletask_metric(
          y_task, y_pred_task, w_task)
      computed_metrics.append(metric_value)
    ##################################################################### DEBUG
   # print("n_tasks")
   # print(n_tasks)
   # print("len(computed_metrics)")
   # print(len(computed_metrics))
   # print("computed_metrics")
   # print(computed_metrics)
   # print("len(computed_metrics)")
   # print(len(computed_metrics))
   # print("computed_metrics")
   # print(computed_metrics)
   # print("len(computed_metrics)")
   # print(len(computed_metrics))
   # print("y_true.shape, y_pred.shape")
   # print(y_true.shape, y_pred.shape)
    ##################################################################### DEBUG
    log("computed_metrics: %s" % str(computed_metrics), self.verbosity)
    if n_tasks == 1:
      computed_metrics = computed_metrics[0]
+0 −10
Original line number Diff line number Diff line
@@ -774,15 +774,5 @@ class TestOverfit(test_util.TensorFlowTestCase):

    # Eval model on train
    scores = model.evaluate(dataset, [metric])
    print("scores")
    print(scores)
    y_pred = model.predict(dataset)
    print("y")
    print(y)
    print("y_pred")
    print(y_pred)
    print("w")
    print(w)
    print("metric.compute_metric(y, y_pred, w)")
    print(metric.compute_metric(y, y_pred, w))
    assert scores[metric.name] < .2
+47 −0
Original line number Diff line number Diff line
"""
Script that trains progressive multitask models on Delaney dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import shutil
import numpy as np
import deepchem as dc
from delaney_dataset import load_delaney

# Only for debug!
np.random.seed(123)

# Load Delaney dataset
n_features = 1024
delaney_tasks, delaney_datasets, transformers = load_delaney()
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

n_layers = 1
nb_epoch = 10
model = dc.models.ProgressiveMultitaskRegressor(
    len(delaney_tasks), n_features,
    layer_sizes=[1000]*n_layers, dropouts=[.25]*n_layers,
    alpha_init_stddevs=[.02]*n_layers, weight_init_stddevs=[.02]*n_layers,
    bias_init_consts=[1.]*n_layers, learning_rate=.001,
    penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
    seed=123, verbosity="high")

# Fit trained model
model.fit(train_dataset)
model.save()

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
+3 −6
Original line number Diff line number Diff line
@@ -35,12 +35,9 @@ def load_kaggle(shard_size=10000, num_shards_per_batch=4):
  time1 = time.time()
  ############################################################## TIMING
  # Set some global variables up top
  train_files = ("../merck_datasets/KAGGLE_processed/"
                 "KAGGLE_training_disguised_combined_full.csv.gz")
  valid_files = ("../merck_datasets/KAGGLE_processed/"
                 "KAGGLE_test1_disguised_combined_full.csv.gz")
  test_files = ("../merck_datasets/KAGGLE_processed/"
                "KAGGLE_test2_disguised_combined_full.csv.gz")
  train_files = ("./KAGGLE_training_disguised_combined_full.csv.gz")
  valid_files = ("./KAGGLE_test1_disguised_combined_full.csv.gz")
  test_files = ("./KAGGLE_test2_disguised_combined_full.csv.gz")

  # Featurize KAGGLE dataset
  print("About to featurize KAGGLE dataset.")