Merge pull request #517 from lilleswing/tg-rewrite-clean (dd6f32b0) · Commits · 钟慕尧 / deepchem

contrib/atomicconv/feat/atomicnet_coordinates.py

+8 −8

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@ import numpy as np
		from rdkit import Chem
		from deepchem.feat import Featurizer
		from deepchem.feat import ComplexFeaturizer
		from deepchem.utils import pad_array
		from deepchem.utils import pad_array, rdkit_util


		def get_cells(coords, neighbor_cutoff):
		@@ -153,9 +153,8 @@ def compute_neighbor_cell_map(N_x, N_y, N_z):
		for x_offset in offsets:
		for y_offset in offsets:
		for z_offset in offsets:
		neighbors.append(
		((x_ind + x_offset) % N_x, (y_ind + y_offset) % N_y,
		(z_ind + z_offset) % N_z))
		neighbors.append(((x_ind + x_offset) % N_x, (y_ind + y_offset) %
		N_y, (z_ind + z_offset) % N_z))
		neighbor_cell_map[(x_ind, y_ind, z_ind)] = neighbors
		return neighbor_cell_map

		@@ -245,6 +244,7 @@ class NeighborListAtomicCoordinates(Featurizer):
		Molecule

		"""
		print(mol)
		N = mol.GetNumAtoms()
		coords = get_coords(mol)

		@@ -394,10 +394,10 @@ class ComplexNeighborListFragmentAtomicCoordinates(ComplexFeaturizer):
		"""

		try:
		frag1_mol = Chem.MolFromPDBFile(
		frag1_pdb_file, sanitize=False, removeHs=False)
		frag2_mol = Chem.MolFromPDBFile(
		frag2_pdb_file, sanitize=False, removeHs=False)
		frag1_mol = rdkit_util.load_molecule(
		frag1_pdb_file, add_hydrogens=False, calc_charges=False)[1]
		frag2_mol = rdkit_util.load_molecule(
		frag2_pdb_file, add_hydrogens=False, calc_charges=False)[1]
		except:
		frag1_mol = None
		frag2_mol = None

contrib/atomicconv/feat/atomicnet_pdbbind_datasets.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ from __future__ import unicode_literals
		import os
		import numpy as np
		import pandas as pd
		import deepchem as dc
		from atomicnet_coordinates import ComplexNeighborListFragmentAtomicCoordinates


		@@ -68,7 +69,7 @@ def compute_pdbbind_coordinate_features(complex_featurizer, pdb_subdir,
		"""

		protein_file = os.path.join(pdb_subdir, "%s_pocket.pdb" % pdb_code)
		ligand_file = os.path.join(pdb_subdir, "%s_ligand.pdb" % pdb_code)
		ligand_file = os.path.join(pdb_subdir, "%s_ligand.sdf" % pdb_code)
		feature = complex_featurizer._featurize_complex(
		str(ligand_file), str(protein_file))
		return feature

contrib/atomicconv/feat/featurize.py

+6 −8

Original line number	Diff line number	Diff line
		@@ -9,17 +9,15 @@ __license__ = "MIT"
		import os
		import sys
		from subprocess import call
		from atomicnet_pdbbind_datasets import load_core_pdbbind_fragment_coordinates
		from atomicnet_pdbbind_datasets import load_pdbbind_fragment_coordinates

		call([
		"wget",
		"wget", "-c",
		"http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/pdbbind_v2015.tar.gz"
		])
		if not os.path.exists("v2015"):
		call(["tar", "-xvzf", "pdbbind_v2015.tar.gz"])

		# This could be done with openbabel in python
		call(["convert_ligand_sdf_to_pdb.sh"])

		base_dir = os.getcwd()
		pdbbind_dir = os.path.join(base_dir, "v2015")
		datafile = "INDEX_core_data.2013"
		@@ -30,6 +28,6 @@ complex_num_atoms = 908
		max_num_neighbors = 8
		neighbor_cutoff = 12.0

		pdbbind_tasks, dataset, transformers = load_core_pdbbind_fragment_coordinates(
		pdbbind_tasks, dataset, transformers = load_pdbbind_fragment_coordinates(
		frag1_num_atoms, frag2_num_atoms, complex_num_atoms, max_num_neighbors,
		neighbor_cutoff, pdbbind_dir, base_dir, datafile)
		neighbor_cutoff, pdbbind_dir, base_dir, str(datafile))

deepchem/data/datasets.py

+57 −4

Original line number	Diff line number	Diff line
		@@ -8,6 +8,7 @@ import os
		import numpy as np
		import pandas as pd
		import random
		import six
		from functools import partial
		from deepchem.utils.save import save_to_disk
		from deepchem.utils.save import load_from_disk
		@@ -160,6 +161,16 @@ class Dataset(object):
		epoch=0,
		deterministic=False,
		pad_batches=False):
		"""

		Parameters
		----------


		Returns
		-------

		"""
		"""Get an object that iterates over minibatches from the dataset.

		Each minibatch is returned as a tuple of four numpy arrays: (X, y, w, ids).
		@@ -1031,3 +1042,45 @@ class DiskDataset(Dataset):
		def get_label_stds(self):
		"""Return pandas series of label stds."""
		return self.metadata_df["y_stds"]


		class Databag(object):
		"""
		A utility class to iterate through multiple datasets together.
		"""

		def __init__(self):
		self.datasets = dict()

		def add_dataset(self, key, dataset):
		self.datasets[key] = dataset

		def iterbatches(self, **kwargs):
		"""
		Loop through all internal datasets in the same order
		Parameters
		----------
		batch_size: int
		Number of samples from each dataset to return
		epoch: int
		Number of times to loop through the datasets
		pad_batches: boolean
		Should all batches==batch_size

		Returns
		-------
		Generator which yields a dictionary {key: dataset.X[batch]}

		"""
		key_order = [x for x in self.datasets.keys()]
		if "epochs" in kwargs:
		epochs = kwargs['epochs']
		del kwargs['epochs']
		else:
		epochs = 1
		kwargs['deterministic'] = True
		for epoch in range(epochs):
		iterators = [self.datasets[x].iterbatches(**kwargs) for x in key_order]
		for tup in zip(*iterators):
		m_d = {key_order[i]: tup[i][0] for i in range(len(key_order))}
		yield m_d

deepchem/metrics/init.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -203,7 +203,7 @@ class Metric(object):
		else:
		y_pred = np.reshape(y_pred, (n_samples, n_tasks))
		y_true = np.reshape(y_true, (n_samples, n_tasks))
		if w is None:
		if w is None or len(w) == 0:
		w = np.ones_like(y_true)
		assert y_true.shape[0] == y_pred.shape[0] == w.shape[0]
		computed_metrics = []

Admin message