Commit fd9c2044 authored by unknown's avatar unknown
Browse files

'gdb7_modification'

parent 9577759e
Loading
Loading
Loading
Loading
+42 −0
Original line number Diff line number Diff line
"""
gdb7 dataset loader.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import numpy as np
import shutil
import deepchem as dc

def load_gdb7(featurizer=None, split='index'):
  """Load gdb7 datasets."""
  # Featurize gdb7 dataset
  print("About to featurize gdb7 dataset.")
  current_dir = os.path.dirname(os.path.realpath(__file__))
  dataset_file = os.path.join(
      current_dir, "./gdb7.sdf")
  gdb7_tasks = ["u0_atom"]
  if featurizer is None:
    featurizer = dc.feat.CoulombMatrixEig(23)
  else:
    raise ValueError('Only support Coulomb Matrix featurizer')
  loader = dc.data.SDFLoader(tasks=gdb7_tasks, smiles_field="smiles", 
                             mol_field="mol", featurizer=featurizer)
  dataset = loader.featurize(dataset_file)

  # Initialize transformers 
  transformers = [
      dc.trans.NormalizationTransformer(transform_X=True, dataset=dataset),
      dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)]

  print("About to transform data")
  for transformer in transformers:
      dataset = transformer.transform(dataset)

  splitters = {'index': dc.splits.IndexSplitter(),
               'random': dc.splits.RandomSplitter()}
  splitter = splitters[split]
  train, valid, test = splitter.train_valid_test_split(dataset)
  return gdb7_tasks, (train, valid, test), transformers
+37 −0
Original line number Diff line number Diff line
"""
Script that trains Tensorflow singletask models on GDB7 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import deepchem as dc
import numpy as np
from gdb7_datasets import load_gdb7

np.random.seed(123)

gdb7_tasks, datasets, transformers = load_gdb7()
train_dataset, valid_dataset, test_dataset = datasets

regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, 
                                      mode="regression")
model = dc.models.TensorflowMultiTaskRegressor(
    n_tasks=len(gdb7_tasks), n_features=23,
    learning_rate=.001, momentum=.8, batch_size=512,
    weight_init_stddevs=[1/np.sqrt(2000),1/np.sqrt(800),1/np.sqrt(800),1/np.sqrt(1000)],
    bias_init_consts=[0.,0.,0.,0.], layer_sizes=[2000,800,800,1000], 
    dropouts=[0.1,0.1,0.1,0.1])

# Fit trained model
model.fit(train_dataset)
model.save()

train_scores = model.evaluate(train_dataset, [regression_metric], transformers)
print("Train scores [kcal/mol]")
print(train_scores)

valid_scores = model.evaluate(valid_dataset, [regression_metric], transformers)
print("Validation scores [kcal/mol]")
print(valid_scores)