Commit 63567ce0 authored by miaecle's avatar miaecle
Browse files

remove redundant samples in nci

parent 16ecf372
Loading
Loading
Loading
Loading
+19127 −0

File added.

Preview size limit exceeded, changes collapsed.

+3 −3
Original line number Diff line number Diff line
@@ -137,7 +137,7 @@ def benchmark_loading_datasets(hyper_parameters,
    time_finish_fitting = time.time()
    
    
    with open(os.path.join(out_path, 'results.csv'),'ab') as f:
    with open(os.path.join(out_path, 'results.csv'),'a') as f:
      writer = csv.writer(f)
      if mode == 'classification':
        for i in train_score:
@@ -546,10 +546,10 @@ if __name__ == '__main__':
                           'bias_init_consts': [1., 1.], 
                           'dropouts': [0.25, 0.25], 
                           'penalty': 0.0005, 'penalty_type': 'l2', 
                           'batch_size': 128, 'nb_epoch': 50, 
                           'batch_size': 128, 'nb_epoch': 300, 
                           'learning_rate': 0.00008}]
  
  hps['graphconvreg'] = [{'batch_size': 128, 'nb_epoch': 20, 
  hps['graphconvreg'] = [{'batch_size': 128, 'nb_epoch': 300, 
                          'learning_rate': 0.0005, 'n_filters': 128, 
                          'n_fully_connected_nodes': 256, 'seed': 123}]

+3 −7
Original line number Diff line number Diff line
@@ -19,12 +19,8 @@ def load_nci(featurizer='ECFP', shard_size=1000, split='random'):

  # Load nci dataset
  print("About to load NCI dataset.")
  dataset_file1_path = os.path.join(
      current_dir, "../../datasets/nci_1.csv.gz")
  dataset_file2_path = os.path.join(
      current_dir, "../../datasets/nci_2.csv.gz")

  dataset_paths = [dataset_file1_path, dataset_file2_path]
  dataset_path = os.path.join(
      current_dir, "../../datasets/nci_unique.csv")


  # Featurize nci dataset
@@ -50,7 +46,7 @@ def load_nci(featurizer='ECFP', shard_size=1000, split='random'):
  loader = dc.data.CSVLoader(
      tasks=all_nci_tasks, smiles_field="smiles", featurizer=featurizer)

  dataset = loader.featurize(dataset_paths, shard_size=shard_size)
  dataset = loader.featurize(dataset_path, shard_size=shard_size)

  # Initialize transformers
  print("About to transform data")