Commit e3ca5d6c authored by Peter Eastman's avatar Peter Eastman
Browse files

More fixes to examples

parent 1636ede4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ train_dataset, valid_dataset, test_dataset = muv_datasets
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

rate = dc.models.tensorgraph.optimizers.ExponentialDecay(0.001, 0.8, 1000)
rate = dc.models.optimizers.ExponentialDecay(0.001, 0.8, 1000)
model = dc.models.MultitaskClassifier(
    len(muv_tasks),
    n_features=1024,
+3 −12
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ from __future__ import unicode_literals
import os
import numpy as np
import shutil
from nci_datasets import load_nci
from deepchem.molnet import load_nci
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from deepchem.data import Dataset
@@ -23,28 +23,19 @@ np.random.seed(123)
# Set some global variables up top
verbosity = "high"

base_dir = "/tmp/nci_rf"
model_dir = os.path.join(base_dir, "model")
if os.path.exists(base_dir):
  shutil.rmtree(base_dir)
os.makedirs(base_dir)

nci_tasks, nci_dataset, transformers = load_nci(
    base_dir)
nci_tasks, nci_dataset, transformers = load_nci()

(train_dataset, valid_dataset, test_dataset) = nci_dataset

classification_metric = Metric(metrics.roc_auc_score, np.mean,
                               verbosity=verbosity,
                               mode="classification")
def model_builder(model_dir):
  sklearn_model = RandomForestRegressor(n_estimators=500)
  return SklearnModel(sklearn_model, model_dir)
model = SingletaskToMultitask(nci_tasks, model_builder, model_dir)
model = SingletaskToMultitask(nci_tasks, model_builder)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([classification_metric])
+0 −1
Original line number Diff line number Diff line
@@ -37,7 +37,6 @@ model = MultitaskClassifier(

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers)
train_scores = train_evaluator.compute_model_performance([metric])
+1 −2
Original line number Diff line number Diff line
@@ -27,8 +27,7 @@ metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)
frag1_num_atoms = 70  # for ligand atoms
frag2_num_atoms = 24000  # for protein atoms
complex_num_atoms = frag1_num_atoms + frag2_num_atoms
atomic_convnet = atomic_conv.AtomicConvModel(
    batch_size=batch_size,
model = dc.models.AtomicConvModel(
    frag1_num_atoms=frag1_num_atoms,
    frag2_num_atoms=frag2_num_atoms,
    complex_num_atoms=complex_num_atoms)
+2 −5
Original line number Diff line number Diff line
@@ -24,16 +24,12 @@ train_dataset, valid_dataset, test_dataset = pdbbind_datasets

metric = dc.metrics.Metric(dc.metrics.pearson_r2_score)

current_dir = os.path.dirname(os.path.realpath(__file__))
model_dir = os.path.join(current_dir, "%s_%s_RF" % (split, subset))

sklearn_model = RandomForestRegressor(n_estimators=500)
model = dc.models.SklearnModel(sklearn_model, model_dir=model_dir)
model = dc.models.SklearnModel(sklearn_model)

# Fit trained model
print("Fitting model on train dataset")
model.fit(train_dataset)
model.save()

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
@@ -44,3 +40,4 @@ print(train_scores)

print("Validation scores")
print(valid_scores)
Loading