Commit 4e0d4bc2 authored by leswing's avatar leswing
Browse files

Merge branch 'master' into write-errors

parents 89608c9c 883c381a
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -2,7 +2,6 @@ language: python
python:
- '2.7'
- '3.5'
- '3.6'
sudo: required
dist: trusty
install:
+11 −2
Original line number Diff line number Diff line
@@ -57,9 +57,18 @@ Installing via this script will ensure that you are **installing from the source
```bash
git clone https://github.com/deepchem/deepchem.git      # Clone deepchem source code from GitHub
cd deepchem
bash scripts/install_deepchem_conda.sh deepchem
```
If you don't want GPU support:
```
bash scripts/install_deepchem_conda.sh deepchem         # If you don't want GPU support
```
If you want GPU support:
```
gpu=1 bash scripts/install_deepchem_conda.sh deepchem         # If you want GPU support
```
Note : `gpu=0 bash scripts/install_deepchem_conda.sh deepchem` will also install CPU supported `deepchem`.
```
source activate deepchem
yes | pip install tensorflow-gpu==1.6.0      # If you want GPU support
python setup.py install                                # Manual install
nosetests -a '!slow' -v deepchem --nologcapture        # Run tests
```
+1 −1
Original line number Diff line number Diff line
@@ -412,5 +412,5 @@ class ImageLoader(DataLoader):
      return NumpyDataset(images)
    else:
      # from_numpy currently requires labels. Make dummy labels
      labels = np.zeros(len(images))
      labels = np.zeros((len(images), 1))
      return DiskDataset.from_numpy(images, labels)
+33 −25
Original line number Diff line number Diff line
@@ -94,11 +94,15 @@ def pad_batch(batch_size, X_b, y_b, w_b, ids_b):

  if y_b is None:
    y_out = None
  elif len(y_b.shape) < 2:
    y_out = np.zeros(batch_size, dtype=y_b.dtype)
  else:
    y_out = np.zeros((batch_size, y_b.shape[1]), dtype=y_b.dtype)

  if w_b is None:
    w_out = None
  elif len(w_b.shape) < 2:
    w_out = np.zeros(batch_size, dtype=w_b.dtype)
  else:
    w_out = np.zeros((batch_size, w_b.shape[1]), dtype=w_b.dtype)

@@ -343,6 +347,8 @@ class NumpyDataset(Dataset):

  def get_task_names(self):
    """Get the names of the tasks associated with this dataset."""
    if len(self._y.shape) < 2:
      return np.array([0])
    return np.arange(self._y.shape[1])

  @property
@@ -956,20 +962,16 @@ class DiskDataset(Dataset):
                 data_dir=None,
                 verbose=True):
    """Creates a DiskDataset object from specified Numpy arrays."""
    # if data_dir is None:
    #  data_dir = tempfile.mkdtemp()
    n_samples = len(X)
    # The -1 indicates that y will be reshaped to have length -1
    if n_samples > 0:
      y = np.reshape(y, (n_samples, -1))
      if w is not None:
        w = np.reshape(w, (n_samples, -1))
    if ids is None:
      ids = np.arange(n_samples)
    if w is None:
      w = np.ones_like(y)
    if tasks is None:
      if len(y.shape) > 1:
        n_tasks = y.shape[1]
      else:
        n_tasks = 1
      tasks = np.arange(n_tasks)
    # raw_data = (X, y, w, ids)
    return DiskDataset.create_dataset(
@@ -1205,8 +1207,8 @@ class DiskDataset(Dataset):
          if indices_count + num_shard_elts >= len(indices):
            break
        # Need to offset indices to fit within shard_size
        shard_inds = indices[indices_count:
                             indices_count + num_shard_elts] - count
        shard_inds = indices[indices_count:indices_count +
                             num_shard_elts] - count
        X_sel = X[shard_inds]
        # Handle the case of datasets with y/w missing
        if y is not None:
@@ -1257,17 +1259,29 @@ class DiskDataset(Dataset):
  def y(self):
    """Get the y vector for this dataset as a single numpy array."""
    ys = []
    one_dimensional = False
    for (_, y_b, _, _) in self.itershards():
      ys.append(y_b)
      if len(y_b.shape) == 1:
        one_dimensional = True
    if not one_dimensional:
      return np.vstack(ys)
    else:
      return np.concatenate(ys)

  @property
  def w(self):
    """Get the weight vector for this dataset as a single numpy array."""
    ws = []
    one_dimensional = False
    for (_, _, w_b, _) in self.itershards():
      ws.append(np.array(w_b))
      if len(w_b.shape) == 1:
        one_dimensional = True
    if not one_dimensional:
      return np.vstack(ws)
    else:
      return np.concatenate(ws)

  def __len__(self):
    """
@@ -1282,22 +1296,16 @@ class DiskDataset(Dataset):
  def get_shape(self):
    """Finds shape of dataset."""
    n_tasks = len(self.get_task_names())
    X_shape = np.array((0,) + (0,) * len(self.get_data_shape()))
    ids_shape = np.array((0,))
    for shard_num, (X, y, w, ids) in enumerate(self.itershards()):
      if shard_num == 0:
        X_shape = np.array(X.shape)
        if n_tasks > 0:
      y_shape = np.array((0,) + (0,))
      w_shape = np.array((0,) + (0,))
          y_shape = np.array(y.shape)
          w_shape = np.array(w.shape)
        else:
          y_shape = tuple()
          w_shape = tuple()

    for shard_num, (X, y, w, ids) in enumerate(self.itershards()):
      if shard_num == 0:
        X_shape += np.array(X.shape)
        if n_tasks > 0:
          y_shape += np.array(y.shape)
          w_shape += np.array(w.shape)
        ids_shape += np.array(ids.shape)
        ids_shape = np.array(ids.shape)
      else:
        X_shape[0] += np.array(X.shape)[0]
        if n_tasks > 0:
+1 −1
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ class VinaPoseGenerator(PoseGenerator):
      logger.info("Vina not available. Downloading")
      # TODO(rbharath): May want to move this file to S3 so we can ensure it's
      # always available.
      wget_cmd = "wget -nv -c http://vina.scripps.edu/download/autodock_vina_1_1_2_linux_x86.tgz"
      wget_cmd = "wget -nv -c -T 15 http://vina.scripps.edu/download/autodock_vina_1_1_2_linux_x86.tgz"
      call(wget_cmd.split())
      logger.info("Downloaded Vina. Extracting")
      download_cmd = "tar xzvf autodock_vina_1_1_2_linux_x86.tgz"
Loading