Commit 2c0a5566 authored by miaecle's avatar miaecle
Browse files

Merge remote-tracking branch 'remotes/origin/master'

parents 6835a33a 0d573ffc
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -25,4 +25,3 @@ script:
after_success:
- echo $TRAVIS_SECURE_ENV_VARS
- coveralls
- source devtools/travis-ci/after_sucess.sh
+2 −3
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@ FROM nvidia/cuda

# Install some utilities
RUN apt-get update && \
    apt-get install -y -q wget git libxrender1 && \
    apt-get install -y -q wget git libxrender1 libsm6 && \
    apt-get clean

# Install miniconda
@@ -21,10 +21,9 @@ ENV PATH /miniconda/bin:$PATH
# TODO: Get rid of this when there is a stable release of deepchem.
RUN git clone https://github.com/deepchem/deepchem.git && \
    cd deepchem && \
    git checkout tags/1.2.0 && \
    git checkout tags/1.3.0 && \
    sed -i -- 's/tensorflow$/tensorflow-gpu/g' scripts/install_deepchem_conda.sh && \
    bash scripts/install_deepchem_conda.sh root && \
    pip install tensorflow-gpu==1.0.1 && \
    python setup.py develop

# Clean up
+1 −1
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ via this installation procedure.

### Easy Install via Conda
```bash
conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=1.2.0
conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=1.3.0
```

### Installing Dependencies Manually
+15 −11
Original line number Diff line number Diff line
@@ -324,10 +324,12 @@ class NumpyDataset(Dataset):
        sample_perm = np.arange(n_samples)
      if batch_size is None:
        batch_size = n_samples
      interval_points = np.linspace(
          0, n_samples, np.ceil(float(n_samples) / batch_size) + 1, dtype=int)
      for j in range(len(interval_points) - 1):
        indices = range(interval_points[j], interval_points[j + 1])
      batch_idx = 0
      num_batches = np.math.ceil(n_samples / batch_size)
      while batch_idx < num_batches:
        start = batch_idx * batch_size
        end = min(n_samples, (batch_idx + 1) * batch_size)
        indices = range(start, end)
        perm_indices = sample_perm[indices]
        X_batch = dataset._X[perm_indices]
        y_batch = dataset._y[perm_indices]
@@ -336,6 +338,7 @@ class NumpyDataset(Dataset):
        if pad_batches:
          (X_batch, y_batch, w_batch, ids_batch) = pad_batch(
              batch_size, X_batch, y_batch, w_batch, ids_batch)
        batch_idx += 1
        yield (X_batch, y_batch, w_batch, ids_batch)

    return iterate(self, batch_size, deterministic, pad_batches)
@@ -663,13 +666,13 @@ class DiskDataset(Dataset):
          shard_batch_size = n_samples
        else:
          shard_batch_size = batch_size
        interval_points = np.linspace(
            0,
            n_samples,
            np.ceil(float(n_samples) / shard_batch_size) + 1,
            dtype=int)
        for j in range(len(interval_points) - 1):
          indices = range(interval_points[j], interval_points[j + 1])

        batch_idx = 0
        num_batches = np.math.ceil(n_samples / shard_batch_size)
        while batch_idx < num_batches:
          start = batch_idx * shard_batch_size
          end = min(n_samples, (batch_idx + 1) * shard_batch_size)
          indices = range(start, end)
          perm_indices = sample_perm[indices]
          X_batch = X[perm_indices]

@@ -687,6 +690,7 @@ class DiskDataset(Dataset):
          if pad_batches:
            (X_batch, y_batch, w_batch, ids_batch) = pad_batch(
                shard_batch_size, X_batch, y_batch, w_batch, ids_batch)
          batch_idx += 1
          yield (X_batch, y_batch, w_batch, ids_batch)
      pool.close()

+23 −1
Original line number Diff line number Diff line
@@ -384,3 +384,25 @@ class TestDatasets(unittest.TestCase):
    np.testing.assert_allclose(comp_y_means, y_means)
    np.testing.assert_allclose(comp_X_stds, X_stds)
    np.testing.assert_allclose(comp_y_stds, y_stds)

  def test_disk_iterate_batch_size(self):
    solubility_dataset = dc.data.tests.load_solubility_data()
    X, y, _, _ = (solubility_dataset.X, solubility_dataset.y,
                  solubility_dataset.w, solubility_dataset.ids)
    batch_sizes = []
    for X, y, _, _ in solubility_dataset.iterbatches(
        3, pad_batches=False, deterministic=True):
      batch_sizes.append(len(X))
    self.assertEqual([3, 3, 3, 1], batch_sizes)

  def test_numpy_iterate_batch_size(self):
    solubility_dataset = dc.data.tests.load_solubility_data()
    X, y, _, _ = (solubility_dataset.X, solubility_dataset.y,
                  solubility_dataset.w, solubility_dataset.ids)
    solubility_dataset = dc.data.NumpyDataset.from_DiskDataset(
        solubility_dataset)
    batch_sizes = []
    for X, y, _, _ in solubility_dataset.iterbatches(
        3, pad_batches=False, deterministic=True):
      batch_sizes.append(len(X))
    self.assertEqual([3, 3, 3, 1], batch_sizes)
Loading