Commit 6337b0e2 authored by Joseph Gomes's avatar Joseph Gomes
Browse files

Update KRR kernel for GDB7

parent a37c108a
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -32,9 +32,9 @@ mol_field = "mol"
featurizer = dc.data.SDFLoader(tasks, smiles_field=smiles_field, mol_field=mol_field, featurizer=featurizers)
dataset = featurizer.featurize(input_file, data_dir)
random_splitter = dc.splits.RandomSplitter()
train_dataset, test_dataset = random_splitter.train_test_split(dataset, train_dir, test_dir)
transformers = [dc.trans.NormalizationTransformer(transform_X=True, dataset=train_dataset), dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
#transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
train_dataset, test_dataset = random_splitter.train_test_split(dataset, train_dir, test_dir, frac_train=0.8)
#transformers = [dc.trans.NormalizationTransformer(transform_X=True, dataset=train_dataset), dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]

for transformer in transformers:
    train_dataset = transformer.transform(train_dataset)
@@ -45,7 +45,7 @@ regression_metric = dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regr

def model_builder(model_dir):
  sklearn_model = KernelRidge(
      kernel="laplacian", alpha=0.05, gamma=0.1)
      kernel="rbf", alpha=5e-4, gamma=0.008)
  return dc.models.SklearnModel(sklearn_model, model_dir)
model = dc.models.SingletaskToMultitask(tasks, model_builder, model_dir)