Commit e440ed86 authored by leswing's avatar leswing
Browse files

Merge branch 'master' into tensorgraph-cr

parents 429bb118 8233320d
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -21,9 +21,9 @@ ENV PATH /miniconda/bin:$PATH
# TODO: Get rid of this when there is a stable release of deepchem.
RUN git clone https://github.com/deepchem/deepchem.git && \
    cd deepchem && \
    git checkout 415aebadff54175b7ba108964723c8f69438af94 && \
    git checkout tags/1.0.0 && \
    bash scripts/install_deepchem_conda.sh root && \
    pip install tensorflow-gpu==0.12.1 && \
    pip install tensorflow-gpu==1.0.1 && \
    python setup.py develop

# Clean up
+2 −0
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@ from deepchem.models.models import Model
from deepchem.models.sklearn_models import SklearnModel
from deepchem.models.tf_new_models.multitask_classifier import MultitaskGraphClassifier
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor
from deepchem.models.tf_new_models.DTNN_regressor import DTNNGraphRegressor

from deepchem.models.tf_new_models.support_classifier import SupportGraphClassifier
from deepchem.models.multitask import SingletaskToMultitask
from deepchem.models.sequential import Sequential
+10.1 KiB

File added.

No diff preview for this file type.

+45 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ import sklearn
import shutil
import tensorflow as tf
import deepchem as dc
import scipy.io
from tensorflow.python.framework import test_util
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
@@ -661,6 +662,50 @@ class TestOverfit(test_util.TensorFlowTestCase):

    assert scores[classification_metric.name] < .2

  def test_DTNN_multitask_regression_overfit(self):
    """Test deep tensor neural net overfits tiny data."""
    np.random.seed(123)
    tf.set_random_seed(123)

    # Load mini log-solubility dataset.
    input_file = os.path.join(self.current_dir, "example_DTNN.mat")
    dataset = scipy.io.loadmat(input_file)
    X = dataset['X']
    y = dataset['T']
    w = np.ones_like(y)
    dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
    regression_metric = dc.metrics.Metric(
        dc.metrics.pearson_r2_score, task_averager=np.mean)
    n_tasks = y.shape[1]
    max_n_atoms = list(dataset.get_data_shape())[0]
    batch_size = 10

    graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=max_n_atoms)
    graph_model.add(dc.nn.DTNNEmbedding(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
    graph_model.add(dc.nn.DTNNGather(n_embedding=20))
    n_feat = 20
    model = dc.models.DTNNGraphRegressor(
        graph_model,
        n_tasks,
        n_feat,
        batch_size=batch_size,
        learning_rate=1e-3,
        learning_rate_decay_time=1000,
        optimizer_type="adam",
        beta1=.9,
        beta2=.999)

    # Fit trained model
    model.fit(dataset, nb_epoch=20)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])

    assert scores[regression_metric.name] > .9

  def test_siamese_singletask_classification_overfit(self):
    """Test siamese singletask model overfits tiny data."""
    np.random.seed(123)
+28 −0
Original line number Diff line number Diff line
import tensorflow as tf
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor


class DTNNGraphRegressor(MultitaskGraphRegressor):

  def build(self):
    # Create target inputs
    self.label_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="label_placeholder")
    self.weight_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="weight_placholder")

    feat = self.model.return_outputs()
    feat_size = self.feat_dim
    outputs = []
    W_list = []
    b_list = []
    for task in range(self.n_tasks):
      W_list.append(
          tf.Variable(
              tf.truncated_normal([feat_size, 1], stddev=0.01),
              name='w',
              dtype=tf.float32))
      b_list.append(tf.Variable(tf.zeros([1]), name='b', dtype=tf.float32))
      outputs.append(
          tf.squeeze(tf.nn.xw_plus_b(feat, W_list[task], b_list[task])))
    return outputs
Loading