Commit fcc4278e authored by nd-02110114's avatar nd-02110114
Browse files

Merge branch 'master' into update-dependecies

parents 071a12e8 9223166b
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
[![Anaconda-Server Badge](https://anaconda.org/conda-forge/deepchem/badges/version.svg)](https://anaconda.org/conda-forge/deepchem)
[![PyPI version](https://badge.fury.io/py/deepchem.svg)](https://badge.fury.io/py/deepchem)

[Website](https://deepchem.io/) | [Documentation (master)](https://deepchem.readthedocs.io/en/latest/)) | [Colab Tutorial](https://github.com/deepchem/deepchem/tree/master/examples/tutorials) | [Discussion Forum](https://forum.deepchem.io/) | [Gitter](https://gitter.im/deepchem/Lobby)
[Website](https://deepchem.io/) | [Documentation (master)](https://deepchem.readthedocs.io/en/latest/) | [Colab Tutorial](https://github.com/deepchem/deepchem/tree/master/examples/tutorials) | [Discussion Forum](https://forum.deepchem.io/) | [Gitter](https://gitter.im/deepchem/Lobby)

DeepChem aims to provide a high quality open-source toolchain
that democratizes the use of deep-learning in drug discovery,
@@ -58,6 +58,7 @@ DeepChem has a number of "soft" requirements. These are packages which are neede
- [RDKit](http://www.rdkit.org/docs/Install.html)
- [simdna](https://github.com/kundajelab/simdna)
- [XGBoost](https://xgboost.readthedocs.io/en/latest/)
- [Weights & Biases](https://docs.wandb.com/)
- [Tensorflow Probability](https://www.tensorflow.org/probability)

## Installation
+41 −7
Original line number Diff line number Diff line
@@ -412,8 +412,8 @@ class Dataset(object):

    Returns
    -------
    If `X_stats == True`, returns `(X_means, X_stds)`. If `y_stats ==
    True`, returns `(y_means, y_stds)`. If both are true, returns
    If `X_stats == True`, returns `(X_means, X_stds)`. If `y_stats == True`,
    returns `(y_means, y_stds)`. If both are true, returns
    `(X_means, X_stds, y_means, y_stds)`.
    """
    X_means = 0.0
@@ -1160,8 +1160,8 @@ class DiskDataset(Dataset):
    `math.ceil(len(dataset)/batch_size)`. Each minibatch is returned as
    a tuple of four numpy arrays: `(X, y, w, ids)`.

    Parameters:
    -----------
    Parameters
    ----------
    batch_size: int
      Number of elements in a batch. If None, then it yields batches
      with size equal to the size of each individual shard.
@@ -1655,6 +1655,36 @@ class DiskDataset(Dataset):
    return np.array(
        load_from_disk(os.path.join(self.data_dir, row['ids'])), dtype=object)

  def get_shard_y(self, i):
    """Retrieves the labels for the i-th shard from disk.

    Parameters
    ----------
    i: int
      Shard index for shard to retrieve labels from
    """

    if self._cached_shards is not None and self._cached_shards[i] is not None:
      return self._cached_shards[i].y
    row = self.metadata_df.iloc[i]
    return np.array(
        load_from_disk(os.path.join(self.data_dir, row['y'])), dtype=object)

  def get_shard_w(self, i):
    """Retrieves the weights for the i-th shard from disk.

    Parameters
    ----------
    i: int
      Shard index for shard to retrieve weights from
    """

    if self._cached_shards is not None and self._cached_shards[i] is not None:
      return self._cached_shards[i].w
    row = self.metadata_df.iloc[i]
    return np.array(
        load_from_disk(os.path.join(self.data_dir, row['w'])), dtype=object)

  def add_shard(self, X, y, w, ids):
    """Adds a data shard."""
    metadata_rows = self.metadata_df.values.tolist()
@@ -1758,9 +1788,12 @@ class DiskDataset(Dataset):
  @property
  def y(self):
    """Get the y vector for this dataset as a single numpy array."""
    if len(self) == 0:
      return np.array([])
    ys = []
    one_dimensional = False
    for (_, y_b, _, _) in self.itershards():
    for i in range(self.get_number_shards()):
      y_b = self.get_shard_y(i)
      ys.append(y_b)
      if len(y_b.shape) == 1:
        one_dimensional = True
@@ -1774,8 +1807,9 @@ class DiskDataset(Dataset):
    """Get the weight vector for this dataset as a single numpy array."""
    ws = []
    one_dimensional = False
    for (_, _, w_b, _) in self.itershards():
      ws.append(np.array(w_b))
    for i in range(self.get_number_shards()):
      w_b = self.get_shard_w(i)
      ws.append(w_b)
      if len(w_b.shape) == 1:
        one_dimensional = True
    if not one_dimensional:
+30 −0
Original line number Diff line number Diff line
import numpy as np
import deepchem as dc


def test_y_property():
  """Test that dataset.y works."""
  num_datapoints = 10
  num_features = 10
  num_tasks = 1
  X = np.random.rand(num_datapoints, num_features)
  y = np.random.randint(2, size=(num_datapoints, num_tasks))
  w = np.ones((num_datapoints, num_tasks))
  ids = np.array(["id"] * num_datapoints)
  dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids)
  y_out = dataset.y
  np.testing.assert_array_equal(y, y_out)


def test_w_property():
  """Test that dataset.y works."""
  num_datapoints = 10
  num_features = 10
  num_tasks = 1
  X = np.random.rand(num_datapoints, num_features)
  y = np.random.randint(2, size=(num_datapoints, num_tasks))
  w = np.ones((num_datapoints, num_tasks))
  ids = np.array(["id"] * num_datapoints)
  dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids)
  w_out = dataset.w
  np.testing.assert_array_equal(w, w_out)
+4 −4
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@ logger = logging.getLogger(__name__)
def extract_active_site(protein_file, ligand_file, cutoff=4):
  """Extracts a box for the active site.

  Params
  ------
  Parameters
  ----------
  protein_file: str
    Location of protein PDB
  ligand_file: str
@@ -116,8 +116,8 @@ class ConvexHullPocketFinder(BindingPocketFinder):
    face of the hull is converted into a coordinate box used for
    binding.

    Params
    ------
    Parameters
    ----------
    macromolecule_file: str
      Location of the macromolecule file to load

+2 −2
Original line number Diff line number Diff line
@@ -93,8 +93,8 @@ class VinaPoseGenerator(PoseGenerator):
  def __init__(self, sixty_four_bits=True, pocket_finder=None):
    """Initializes Vina Pose Generator

    Params
    ------
    Parameters
    ----------
    sixty_four_bits: bool, optional (default True)
      Specifies whether this is a 64-bit machine. Needed to download
      the correct executable.
Loading