Commit 517bf7b7 authored by miaecle's avatar miaecle
Browse files

molnet valid

parent ed19eafc
Loading
Loading
Loading
Loading

delaney_dag_valid.py

0 → 100644
+58 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import deepchem
import numpy as np
import tensorflow as tf

seed = 123
np.random.seed(seed)

tasks, datasets, transformers = deepchem.molnet.load_delaney(featurizer='GraphConv', split='random', reload=False)
train_dataset, valid_dataset, test_dataset = datasets
metric = [deepchem.metrics.Metric(deepchem.metrics.rms_score, np.mean)]

max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

reshard_size = 512
transformer = deepchem.trans.DAGTransformer(max_atoms=max_atoms)
train_dataset.reshard(reshard_size)
train_dataset = transformer.transform(train_dataset)
valid_dataset.reshard(reshard_size)
valid_dataset = transformer.transform(valid_dataset)
test_dataset.reshard(reshard_size)
test_dataset = transformer.transform(test_dataset)

batch_size = 128
nb_epoch = 1000
learning_rate = 0.0005
n_graph_feat = 23

tf.set_random_seed(seed)
model = deepchem.models.DAGTensorGraph(
               1, 
               max_atoms=55,
               n_atom_feat=75,
               n_graph_feat=n_graph_feat,
               mode='regression',
               batch_size=batch_size,
               leanring_rate=learning_rate,
               use_queue=False)

model.fit(train_dataset, nb_epoch=nb_epoch)
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
test_scores = model.evaluate(test_dataset, metric, transformers)

""" Expected Results:
  train_scores: {'mean-rms_score': 0.029829638487211169}
    
  valid_scores: {'mean-rms_score': 0.75142478279661051}
    
  test_scores: {'mean-rms_score': 0.53192168238754678}

"""
 No newline at end of file
+37 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import deepchem
import numpy as np
import tensorflow as tf

seed = 123
np.random.seed(seed)

tasks, datasets, transformers = deepchem.molnet.load_delaney(featurizer='GraphConv', split='random', reload=False)
train_dataset, valid_dataset, test_dataset = datasets
metric = [deepchem.metrics.Metric(deepchem.metrics.rms_score, np.mean)]
  
batch_size = 150
nb_epoch = 1000
learning_rate = 0.0008

tf.set_random_seed(seed)
model = deepchem.models.GraphConvTensorGraph(1, mode='regression',
                                             batch_size=batch_size,
                                             leanring_rate=learning_rate)

model.fit(train_dataset, nb_epoch=nb_epoch)
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
test_scores = model.evaluate(test_dataset, metric, transformers)

""" Expected Results:
  train_scores: {'mean-rms_score': 0.058672648022210311}
    
  valid_scores: {'mean-rms_score': 0.3635136142334261}
    
  test_scores: {'mean-rms_score': 0.35664025829369983}

"""
 No newline at end of file

qm9_dtnn_test.py

0 → 100644
+89 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import deepchem
import numpy as np
import tensorflow as tf
import tempfile

data_dir = deepchem.utils.get_data_dir()
dataset_file = os.path.join(data_dir, "gdb9.sdf")

qm9_tasks = ["u0_atom"]
featurizer = deepchem.feat.CoulombMatrix(29)

loader = deepchem.data.SDFLoader(
        tasks=qm9_tasks,
        smiles_field="smiles",
        mol_field="mol",
        featurizer=featurizer)

dataset = loader.featurize(dataset_file)
splitter = deepchem.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
      dataset)

transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=train_dataset)
]
for transformer in transformers:
  train_dataset = transformer.transform(train_dataset)
  valid_dataset = transformer.transform(valid_dataset)
  test_dataset = transformer.transform(test_dataset)

metric = [deepchem.metrics.Metric(deepchem.metrics.mean_absolute_error, np.mean)]
  
batch_size = 49
nb_epoch = 100
learning_rate = 0.0003
n_embedding = 42
n_distance = 173

seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)
model_dir = tempfile.mkdtemp()
model = deepchem.models.DTNNTensorGraph(
    len(qm9_tasks),
    n_embedding=n_embedding,
    n_hidden=60,
    n_distance=n_distance,
    distance_min=-1.,
    distance_max=18.,
    output_activation=False,
    batch_size=batch_size,
    learning_rate=learning_rate,
    use_queue=False,
    mode="regression",
    model_dir=model_dir)
model.fit(train_dataset, nb_epoch=nb_epoch)
for rate in [learning_rate/5, learning_rate/20, learning_rate/100]:
  model = deepchem.models.DTNNTensorGraph(
      len(qm9_tasks),
      n_embedding=n_embedding,
      n_hidden=60,
      n_distance=n_distance,
      distance_min=-1.,
      distance_max=18.,
      output_activation=False,
      batch_size=batch_size,
      learning_rate=learning_rate/5,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)
  model.restore()
  model.fit(train_dataset, nb_epoch=10)

train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
test_scores = model.evaluate(test_dataset, metric, transformers)

model.fit(train_dataset, nb_epoch=10)
'''
computed_metrics: [0.95282979862675088]
computed_metrics: [1.1501283330568968]
computed_metrics: [1.2601717317672092]
'''
 No newline at end of file

qm9_mpnn_test.py

0 → 100644
+82 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import deepchem
import numpy as np
import tensorflow as tf
import tempfile

seed = 123
data_dir = deepchem.utils.get_data_dir()
dataset_file = os.path.join(data_dir, "gdb9.sdf")

qm9_tasks = ["gap"]
featurizer = deepchem.feat.WeaveFeaturizer(graph_distance=False, explicit_H=True)

loader = deepchem.data.SDFLoader(
        tasks=qm9_tasks,
        smiles_field="smiles",
        mol_field="mol",
        featurizer=featurizer)

dataset = loader.featurize(dataset_file)
np.random.seed(seed)
splitter = deepchem.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
      dataset)

transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=train_dataset)
]
for transformer in transformers:
  train_dataset = transformer.transform(train_dataset)
  valid_dataset = transformer.transform(valid_dataset)
  test_dataset = transformer.transform(test_dataset)

metric = [deepchem.metrics.Metric(deepchem.metrics.mean_absolute_error, np.mean)]
  
batch_size = 64
nb_epoch = 100
learning_rate = 0.0003
n_hidden = 70
T = 2
M = 5

tf.set_random_seed(seed)
model_dir = tempfile.mkdtemp()
model = deepchem.models.DTNNTensorGraph(
    len(qm9_tasks),
    n_hidden=n_hidden,
    T=T,
    M=M,
    batch_size=batch_size,
    learning_rate=learning_rate,
    use_queue=False,
    mode="regression",
    model_dir=model_dir)
model.fit(train_dataset, nb_epoch=nb_epoch)
for rate in [learning_rate/5, learning_rate/20, learning_rate/100]:
  model = deepchem.models.DTNNTensorGraph(
      len(qm9_tasks),
      n_hidden=n_hidden,
      T=T,
      M=M,
      batch_size=batch_size,
      learning_rate=rate,
      use_queue=False,
      mode="regression",
      model_dir=model_dir)
  model.restore()
  model.fit(train_dataset, nb_epoch=10)

train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)
test_scores = model.evaluate(test_dataset, metric, transformers)

model.fit(train_dataset, nb_epoch=10)
'''

'''
 No newline at end of file