Commit 11a539c2 authored by miaecle's avatar miaecle
Browse files

yapf

parent a65d0290
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -799,7 +799,8 @@ class DiskDataset(Dataset):
        X, y, w, ids = (dataset.X, dataset.y, dataset.w, dataset.ids)
        yield (X, y, w, ids)

    return DiskDataset.create_dataset(generator(), data_dir=merge_dir, tasks=datasets[0].tasks)
    return DiskDataset.create_dataset(
        generator(), data_dir=merge_dir, tasks=datasets[0].tasks)

  def subset(self, shard_nums, subset_dir=None):
    """Creates a subset of the original dataset on disk."""
+26 −24
Original line number Diff line number Diff line
@@ -18,7 +18,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
  Gaussian Process Global Optimization(GPGO)
  """

  def hyperparam_search(self,
  def hyperparam_search(
      self,
      params_dict,
      train_dataset,
      valid_dataset,
@@ -30,11 +31,11 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
      max_iter=20,
      search_range=4,
      hp_invalid_list=[
                            'seed', 'nb_epoch', 'penalty_type', 'dropouts',
                            'bypass_dropouts', 'n_pair_feat', 'fit_transformers',
                            'min_child_weight', 'max_delta_step','subsample',
                            'colsample_bylevel', 'colsample_bytree', 'reg_alpha', 
                            'reg_lambda', 'scale_pos_weight', 'base_score'
          'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts',
          'n_pair_feat', 'fit_transformers', 'min_child_weight',
          'max_delta_step', 'subsample', 'colsample_bylevel',
          'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight',
          'base_score'
      ],
      log_file='GPhypersearch.log'):
    """Perform hyperparams search using a gaussian process assumption
@@ -72,7 +73,7 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimize
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

@@ -138,6 +139,7 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):

    data_dir = os.environ['DEEPCHEM_DATA_DIR']
    log_file = os.path.join(data_dir, log_file)

    def f(l00=0,
          l01=0,
          l02=0,
@@ -269,8 +271,8 @@ class GaussianProcessHyperparamOpt(HyperparamOpt):
      # Record hyperparameters
      f.write(str(params_dict))
      f.write('\n')
    if isinstance(self.model_class, str) or isinstance(
        self.model_class, unicode):
    if isinstance(self.model_class, str) or isinstance(self.model_class,
                                                       unicode):
      try:
        train_scores, valid_scores, _ = benchmark_classification(
            train_dataset,
+2 −2
Original line number Diff line number Diff line
@@ -49,8 +49,8 @@ def load_qm8(featurizer='CoulombMatrix', split='random', reload=True):
    elif featurizer == 'Raw':
      featurizer = deepchem.feat.RawFeaturizer()
    elif featurizer == 'MP':
      featurizer = deepchem.feat.WeaveFeaturizer(graph_distance=False,
                                                 explicit_H=True)
      featurizer = deepchem.feat.WeaveFeaturizer(
          graph_distance=False, explicit_H=True)
    loader = deepchem.data.SDFLoader(
        tasks=qm8_tasks,
        smiles_field="smiles",
+4 −4
Original line number Diff line number Diff line
@@ -34,8 +34,8 @@ def load_qm9(featurizer='CoulombMatrix', split='random', reload=True):
      )

  qm9_tasks = [
      "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv",
      "u0_atom", "u298_atom", "h298_atom", "g298_atom"
      "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "cv", "u0", "u298",
      "h298", "g298"
  ]

  if reload:
@@ -52,8 +52,8 @@ def load_qm9(featurizer='CoulombMatrix', split='random', reload=True):
    elif featurizer == 'Raw':
      featurizer = deepchem.feat.RawFeaturizer()
    elif featurizer == 'MP':
      featurizer = deepchem.feat.WeaveFeaturizer(graph_distance=False,
                                                 explicit_H=True)
      featurizer = deepchem.feat.WeaveFeaturizer(
          graph_distance=False, explicit_H=True)
    loader = deepchem.data.SDFLoader(
        tasks=qm9_tasks,
        smiles_field="smiles",
+1 −0
Original line number Diff line number Diff line
@@ -231,6 +231,7 @@ def run_benchmark(datasets,
      with open(os.path.join(out_path, dataset + model + '.pkl'), 'w') as f:
        pickle.dump(hyper_parameters, f)


#
# Note by @XericZephyr. Reason why I spun off this function:
#   1. Some model needs dataset information.
Loading