Commit d7d140a9 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #819 from LRParser/master

Resolve issue whereby pcba_sklearn example fails to train due to outd…
parents fd76aee7 f5044237
Loading
Loading
Loading
Loading
+16 −13
Original line number Diff line number Diff line
@@ -16,12 +16,11 @@ from deepchem.metrics import Metric
from deepchem.models.sklearn_models import SklearnModel
from deepchem.utils.evaluate import Evaluator


np.random.seed(123)

# Set some global variables up top
reload = True
verbosity = "high"
is_verbose = False

base_dir = "/tmp/pcba_sklearn"
model_dir = os.path.join(base_dir, "model")
@@ -29,33 +28,37 @@ if os.path.exists(base_dir):
  shutil.rmtree(base_dir)
os.makedirs(base_dir)

pcba_tasks, pcba_datasets, transformers = load_pcba(
    base_dir, reload=reload)
(train_dataset, valid_dataset) = pcba_datasets
pcba_tasks, pcba_datasets, transformers = load_pcba()
(train_dataset, valid_dataset, test_dataset) = pcba_datasets

classification_metric = Metric(
    metrics.roc_auc_score, np.mean, verbose=is_verbose, mode="classification")

classification_metric = Metric(metrics.roc_auc_score, np.mean,
                               verbosity=verbosity,
                               mode="classification")

def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=500)
  return SklearnModel(sklearn_model, model_dir)
model = SingletaskToMultitask(muv_tasks, model_builder, model_dir)


model = SingletaskToMultitask(pcba_tasks, model_builder, model_dir)

# Fit trained model
model.fit(train_dataset)
model.save()

train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
train_scores = train_evaluator.compute_model_performance([classification_metric])
train_evaluator = Evaluator(
    model, train_dataset, transformers, verbose=is_verbose)
train_scores = train_evaluator.compute_model_performance(
    [classification_metric])

print("Train scores")
print(train_scores)

valid_evaluator = Evaluator(model, valid_dataset, transformers, verbosity=verbosity)
valid_scores = valid_evaluator.compute_model_performance([classification_metric])
valid_evaluator = Evaluator(
    model, valid_dataset, transformers, verbose=is_verbose)
valid_scores = valid_evaluator.compute_model_performance(
    [classification_metric])

print("Validation scores")
print(valid_scores)