Commit 58720ecd authored by mapleaf's avatar mapleaf
Browse files

format code style

parent b39a589c
Loading
Loading
Loading
Loading
+147 −151
Original line number Diff line number Diff line
@@ -191,28 +191,25 @@ def load_pdbbind(reload=True,
        save_dir, "full_protein-%s-%s-%s" % (subset, featurizer, split))

  if save_timestamp:
        save_folder = "%s-%s-%s" % (
            save_folder, time.strftime("%Y%m%d", time.localtime()),
    save_folder = "%s-%s-%s" % (save_folder,
                                time.strftime("%Y%m%d", time.localtime()),
                                re.search("\.(.*)", str(time.time())).group(1))

  if reload:
    if not os.path.exists(save_folder):
      raise IOError("Cannot find saved dataset from %s!" % save_folder)
        print("\nLoading featurized and splitted dataset from:\n%s\n" %
              save_folder)
    print("\nLoading featurized and splitted dataset from:\n%s\n" % save_folder)
    loaded, all_dataset, transformers = deepchem.utils.save.load_dataset_from_disk(
        save_folder)
    if loaded:
      return pdbbind_tasks, all_dataset, transformers
    else:
            raise IOError(
                "Failed to load featurized and splitted dataset from:\n%s\n" %
                save_folder)
      raise IOError("Failed to load featurized and splitted dataset from:\n%s\n"
                    % save_folder)

  dataset_file = os.path.join(data_dir, "pdbbind_v2015.tar.gz")
  if not os.path.exists(dataset_file):
        logger.warning(
            "About to download PDBBind full dataset. Large file, 2GB")
    logger.warning("About to download PDBBind full dataset. Large file, 2GB")
    deepchem.utils.download_url(
        'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/' +
        "pdbbind_v2015.tar.gz",
@@ -228,11 +225,9 @@ def load_pdbbind(reload=True,
  print("\nFeaturized and splitted dataset:\n%s" % save_folder)

  if subset == "core":
        index_labels_file = os.path.join(data_folder,
                                         "INDEX_core_data.2013")
    index_labels_file = os.path.join(data_folder, "INDEX_core_data.2013")
  elif subset == "refined":
        index_labels_file = os.path.join(data_folder,
                                         "INDEX_refined_data.2015")
    index_labels_file = os.path.join(data_folder, "INDEX_refined_data.2015")
  else:
    raise ValueError("Other subsets not supported")

@@ -241,13 +236,11 @@ def load_pdbbind(reload=True,
    pdbs = [line[:4] for line in g.readlines() if line[0] != "#"]
  if load_binding_pocket:
    protein_files = [
            os.path.join(data_folder, pdb, "%s_pocket.pdb" % pdb)
            for pdb in pdbs
        os.path.join(data_folder, pdb, "%s_pocket.pdb" % pdb) for pdb in pdbs
    ]
  else:
    protein_files = [
            os.path.join(data_folder, pdb, "%s_protein.pdb" % pdb)
            for pdb in pdbs
        os.path.join(data_folder, pdb, "%s_protein.pdb" % pdb) for pdb in pdbs
    ]
  ligand_files = [
      os.path.join(data_folder, pdb, "%s_ligand.sdf" % pdb) for pdb in pdbs
@@ -267,8 +260,8 @@ def load_pdbbind(reload=True,
    featurizer = rgf.RdkitGridFeaturizer(
        voxel_width=2.0,
        feature_types=[
                'ecfp', 'splif', 'hbond', 'salt_bridge', 'pi_stack',
                'cation_pi', 'charge'
            'ecfp', 'splif', 'hbond', 'salt_bridge', 'pi_stack', 'cation_pi',
            'charge'
        ],
        flatten=True)
  elif featurizer == "atomic" or featurizer == "atomic_conv":
@@ -303,10 +296,14 @@ def load_pdbbind(reload=True,
  else:
    raise ValueError("Featurizer not supported")

  print("Featurizing Complexes")
  print("\nFeaturizing Complexes for \"%s\" ...\n" % data_folder)
  feat_t1 = time.time()
  features, failures = featurizer.featurize_complexes(ligand_files,
                                                      protein_files)
  # Delete labels for failing elements
  feat_t2 = time.time()
  print("\nFeaturization finished, took %0.3f s." % (feat_t2 - feat_t1))

  # Delete labels and ids for failing elements
  labels = np.delete(labels, failures)
  labels = labels.reshape((len(labels), 1))
  ids = np.delete(pdbs, failures)
@@ -329,8 +326,7 @@ def load_pdbbind(reload=True,
      'random': deepchem.splits.RandomSplitter(),
  }
  splitter = splitters[split]
    train, valid, test = splitter.train_valid_test_split(
        dataset, seed=split_seed)
  train, valid, test = splitter.train_valid_test_split(dataset, seed=split_seed)

  all_dataset = (train, valid, test)
  print("\nSaving dataset to \"%s\" ..." % save_folder)