Commit 006e49fa authored by joegomes's avatar joegomes
Browse files

Run yapf on qm9

parent 5eb92cbe
Loading
Loading
Loading
Loading
+22 −12
Original line number Diff line number Diff line
@@ -10,27 +10,37 @@ import numpy as np
import shutil
import deepchem as dc

def load_qm9(featurizer=None, split='random'):

def load_qm9(featurizer=None, split='random'):
  """Load qm9 datasets."""
  # Featurize qm9 dataset
  print("About to featurize qm9 dataset.")
  current_dir = os.path.dirname(os.path.realpath(__file__))
  dataset_file = os.path.join(
      current_dir, "./gdb9.sdf")
  qm9_tasks = ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv", 
                "u0_atom", "u298_atom", "h298_atom", "g298_atom"]
  dataset_file = os.path.join(current_dir, "./gdb9.sdf")
  qm9_tasks = [
      "A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv",
      "u0_atom", "u298_atom", "h298_atom", "g298_atom"
  ]
  if featurizer is None:
    featurizer = dc.feat.CoulombMatrix(29)
  loader = dc.data.SDFLoader(tasks=qm9_tasks, smiles_field="smiles", 
                             mol_field="mol", featurizer=featurizer)
  loader = dc.data.SDFLoader(
      tasks=qm9_tasks,
      smiles_field="smiles",
      mol_field="mol",
      featurizer=featurizer)
  dataset = loader.featurize(dataset_file)
  splitters = {'index': dc.splits.IndexSplitter(),
  splitters = {
      'index': dc.splits.IndexSplitter(),
      'random': dc.splits.RandomSplitter(),
               'stratified': dc.splits.SingletaskStratifiedSplitter(task_number=11)}
      'stratified': dc.splits.SingletaskStratifiedSplitter(task_number=11)
  }
  splitter = splitters[split]
  train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset)
  transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
  train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
      dataset)
  transformers = [
      dc.trans.NormalizationTransformer(
          transform_y=True, dataset=train_dataset)
  ]
  for transformer in transformers:
    train_dataset = transformer.transform(train_dataset)
    valid_dataset = transformer.transform(valid_dataset)
+16 −6
Original line number Diff line number Diff line
@@ -14,13 +14,23 @@ np.random.seed(123)
qm9_tasks, datasets, transformers = load_qm9()
train_dataset, valid_dataset, test_dataset = datasets
fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]
regression_metric = [dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"), 
              dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")]
regression_metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]
model = dc.models.TensorflowMultiTaskFitTransformRegressor(
    n_tasks=len(qm9_tasks), n_features=[29, 29], learning_rate=0.001 , momentum=.8, batch_size=32,
    n_tasks=len(qm9_tasks),
    n_features=[29, 29],
    learning_rate=0.001,
    momentum=.8,
    batch_size=32,
    weight_init_stddevs=[1 / np.sqrt(400), 1 / np.sqrt(100), 1 / np.sqrt(100)],
    bias_init_consts=[0.,0.,0.], layer_sizes=[400,100,100], 
    dropouts=[0.01,0.01,0.01], fit_transformers=fit_transformers, n_evals=10, seed=123)
    bias_init_consts=[0., 0., 0.],
    layer_sizes=[400, 100, 100],
    dropouts=[0.01, 0.01, 0.01],
    fit_transformers=fit_transformers,
    n_evals=10,
    seed=123)

# Fit trained model
model.fit(train_dataset, nb_epoch=50)