Commit 241dc427 authored by nitinprakash96's avatar nitinprakash96
Browse files
parents 48bbcfe4 58d6b17b
Loading
Loading
Loading
Loading
+8 −17
Original line number Diff line number Diff line
FROM nvidia/cuda
FROM nvidia/cuda:9.0-cudnn7-runtime

# Install some utilities
RUN apt-get update && \
    apt-get install -y -q wget git libxrender1 libsm6 && \
    apt-get install -y -q wget git libxrender1 libsm6 bzip2 && \
    apt-get clean

# Install miniconda
RUN MINICONDA="Miniconda2-latest-Linux-x86_64.sh" && \
RUN MINICONDA="Miniconda3-latest-Linux-x86_64.sh" && \
    wget --quiet https://repo.continuum.io/miniconda/$MINICONDA && \
    bash $MINICONDA -b -p /miniconda && \
    rm -f $MINICONDA
ENV PATH /miniconda/bin:$PATH

# Install deepchem conda package from omnia
# TODO: Uncomment this when there is a stable release of deepchem.
#RUN conda config --add channels omnia
#RUN conda install --yes deepchem

# Install deepchem with GPU support from github using Tue 14 Mar 2017 git head
# TODO: Get rid of this when there is a stable release of deepchem.
RUN git clone https://github.com/deepchem/deepchem.git && \
RUN conda update -n base conda
RUN export LANG=en_US.UTF-8 && \
    git clone https://github.com/deepchem/deepchem.git && \
    cd deepchem && \
    git checkout tags/1.3.1 && \
    git checkout 2.0.0 && \
    sed -i -- 's/tensorflow$/tensorflow-gpu/g' scripts/install_deepchem_conda.sh && \
    bash scripts/install_deepchem_conda.sh root && \
    bash scripts/install_deepchem_conda.sh && \
    python setup.py develop

# Clean up
RUN cd deepchem && \
    git clean -fX

# Run tests
#RUN pip install nose && \
#    nosetests -v deepchem --nologcapture
+2 −2
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ via this installation procedure.
### Easy Install via Conda

```bash
conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=1.3.1
conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=2.0.0
```
**Note:** `Easy Install` installs the latest stable version of `deepchem` and _does not install from source_. If you need to install from source make sure you follow the steps [here](#using-a-conda-environment).

@@ -182,4 +182,4 @@ DeepChem is supported by a number of corporate partners who use DeepChem to solv


## Version
1.3.1
2.0.0
+13 −4
Original line number Diff line number Diff line
@@ -5,14 +5,17 @@ from __future__ import division
from __future__ import unicode_literals

import os
import logging
import deepchem
from deepchem.molnet.load_function.bace_features import bace_user_specified_features

logger = logging.getLogger(__name__)


def load_bace_regression(featurizer='ECFP', split='random', reload=True):
  """Load bace datasets."""
  # Featurize bace dataset
  print("About to featurize bace dataset.")
  logger.info("About to featurize bace dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "bace_r/" + featurizer + "/" + split)
@@ -53,10 +56,13 @@ def load_bace_regression(featurizer='ECFP', split='random', reload=True):
          transform_y=True, dataset=dataset)
  ]

  print("About to transform data")
  logger.info("About to transform data")
  for transformer in transformers:
    dataset = transformer.transform(dataset)

  if split == None:
    return bace_tasks, (dataset, None, None), transformers

  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
@@ -74,7 +80,7 @@ def load_bace_regression(featurizer='ECFP', split='random', reload=True):
def load_bace_classification(featurizer='ECFP', split='random', reload=True):
  """Load bace datasets."""
  # Featurize bace dataset
  print("About to featurize bace dataset.")
  logger.info("About to featurize bace dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "bace_c/" + featurizer + "/" + split)
@@ -114,10 +120,13 @@ def load_bace_classification(featurizer='ECFP', split='random', reload=True):
      deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
  ]

  print("About to transform data")
  logger.info("About to transform data")
  for transformer in transformers:
    dataset = transformer.transform(dataset)

  if split == None:
    return bace_tasks, (dataset, None, None), transformers

  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
+8 −2
Original line number Diff line number Diff line
@@ -5,13 +5,16 @@ from __future__ import division
from __future__ import unicode_literals

import os
import logging
import deepchem

logger = logging.getLogger(__name__)


def load_bbbp(featurizer='ECFP', split='random', reload=True):
  """Load blood-brain barrier penetration datasets """
  # Featurize bbb dataset
  print("About to featurize bbbp dataset.")
  logger.info("About to featurize bbbp dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "bbbp/" + featurizer + "/" + split)
@@ -47,10 +50,13 @@ def load_bbbp(featurizer='ECFP', split='random', reload=True):
      deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
  ]

  print("About to transform data")
  logger.info("About to transform data")
  for transformer in transformers:
    dataset = transformer.transform(dataset)

  if split == None:
    return bbbp_tasks, (dataset, None, None), transformers

  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
+15 −10
Original line number Diff line number Diff line
@@ -5,9 +5,12 @@ from __future__ import division
from __future__ import unicode_literals

import os
import logging
import deepchem
from deepchem.molnet.load_function.chembl_tasks import chembl_tasks

logger = logging.getLogger(__name__)


def load_chembl(shard_size=2000,
                featurizer="ECFP",
@@ -46,7 +49,7 @@ def load_chembl(shard_size=2000,
        'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/chembl_year_sets/chembl_sparse_ts_valid.csv.gz'
    )

  print("About to load ChEMBL dataset.")
  logger.info("About to load ChEMBL dataset.")
  if reload:
    loaded, all_dataset, transformers = deepchem.utils.save.load_dataset_from_disk(
        save_dir)
@@ -62,7 +65,7 @@ def load_chembl(shard_size=2000,
        data_dir, "./chembl_year_sets/chembl_%s_ts_test.csv.gz" % set)

  # Featurize ChEMBL dataset
  print("About to featurize ChEMBL dataset.")
  logger.info("About to featurize ChEMBL dataset.")
  if featurizer == 'ECFP':
    featurizer = deepchem.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
@@ -76,16 +79,16 @@ def load_chembl(shard_size=2000,
      tasks=chembl_tasks, smiles_field="smiles", featurizer=featurizer)

  if split == "year":
    print("Featurizing train datasets")
    logger.info("Featurizing train datasets")
    train_dataset = loader.featurize(train_files, shard_size=shard_size)
    print("Featurizing valid datasets")
    logger.info("Featurizing valid datasets")
    valid_dataset = loader.featurize(valid_files, shard_size=shard_size)
    print("Featurizing test datasets")
    logger.info("Featurizing test datasets")
    test_dataset = loader.featurize(test_files, shard_size=shard_size)
  else:
    dataset = loader.featurize(dataset_path, shard_size=shard_size)
  # Initialize transformers
  print("About to transform data")
  logger.info("About to transform data")
  if split == "year":
    transformers = [
        deepchem.trans.NormalizationTransformer(
@@ -103,15 +106,17 @@ def load_chembl(shard_size=2000,
    for transformer in transformers:
      dataset = transformer.transform(dataset)

  if spit == None:
    return chembl_tasks, (dataset, None, None), transformers

  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
  }

  if split in splitters:
  splitter = splitters[split]
    print("Performing new split.")
  logger.info("Performing new split.")
  train, valid, test = splitter.train_valid_test_split(dataset)

  if reload:
Loading