Commit 6516b0c8 authored by miaecle's avatar miaecle
Browse files

textCNN

parent 7bf4273d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -32,3 +32,4 @@ from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DT
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression

from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
from deepchem.models.tensorgraph.models.text_cnn import TextCNNTensorGraph
+47 −0
Original line number Diff line number Diff line
@@ -395,6 +395,53 @@ class Dense(Layer):
      return self._shared_with._get_scope_name()


class Highway(Layer):

  def __init__(
      self,
      activation_fn=tf.nn.relu,
      biases_initializer=tf.zeros_initializer,
      weights_initializer=tf.contrib.layers.variance_scaling_initializer,
      **kwargs):
    """Create a highway layer. https://arxiv.org/pdf/1505.00387.pdf

    Parameters
    ----------
    activation_fn: object
      the Tensorflow activation function to apply to the output
    biases_initializer: callable object
      the initializer for bias values.  This may be None, in which case the layer
      will not include biases.
    weights_initializer: callable object
      the initializer for weight values
    """
    super(Highway, self).__init__(**kwargs)
    self.activation_fn = activation_fn
    self.biases_initializer = biases_initializer
    self.weights_initializer = weights_initializer

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    parent = inputs[0]
    shape = parent.get_shape().as_list()[1]
    dense1 = tf.contrib.layers.fully_connected(parent,
                                               num_outputs=shape,
                                               activation_fn=self.activation_fn,
                                               biases_initializer=self.biases_initializer(),
                                               weights_initializer=self.weights_initializer(),
                                               trainable=True)
    dense2 = tf.contrib.layers.fully_connected(parent,
                                               num_outputs=shape,
                                               activation_fn=tf.nn.sigmoid,
                                               biases_initializer=tf.constant_initializer(-1),
                                               weights_initializer=self.weights_initializer(),
                                               trainable=True)
    out_tensor = tf.multiply(dense1, dense2) + tf.multiply(parent, 1-dense2)
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor


class Flatten(Layer):
  """Flatten every dimension except the first"""

+5 −4
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ import copy
from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.models.tensorgraph.layers import Dense, Concat, SoftMax, \
  SoftMaxCrossEntropy, BatchNorm, WeightedError, Dropout, BatchNormalization, \
  Conv1D, MaxPool1D, Squeeze, Stack
  Conv1D, MaxPool1D, Squeeze, Stack, Highway
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding

from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
@@ -142,8 +142,9 @@ class TextCNNTensorGraph(TensorGraph):
    concat_outputs = Concat(axis=2, in_layers=self.pooled_outputs)
    outputs = Squeeze(squeeze_dims=1, in_layers=concat_outputs)
    #HIGHWAY LAYER
    highway = Highway(in_layers=[outputs])
    self.gather = Dropout(dropout_prob=self.dropout, in_layers=[highway])
    dropout = Dropout(dropout_prob=self.dropout, in_layers=[outputs])
    dense = Dense(out_channels=200, activation_fn=tf.nn.relu, in_layers=[dropout])
    self.gather = Highway(in_layers=[dense])

    costs = []
    self.labels_fd = []
@@ -216,7 +217,7 @@ class TextCNNTensorGraph(TensorGraph):
    while i < smiles_len:
      if smiles[i:i+1] == ' ':
        i = i + 1
      if smiles[i:i+2] in keys:
      elif smiles[i:i+2] in keys:
        seq.append(self.char_dict[smiles[i:i+2]])
        i = i + 2
      elif smiles[i:i+1] in keys:
+47 −0
Original line number Diff line number Diff line
"""
Script that trains textCNN models on delaney dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(
    featurizer='Raw', split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

char_dict, length = dc.models.TextCNNTensorGraph.build_char_dict(train_dataset)

# Batch size of models
batch_size = 64

model = dc.models.TextCNNTensorGraph(
    len(delaney_tasks),
    char_dict,
    seq_length=length,
    mode='regression',
    learning_rate=1e-3,
    batch_size=batch_size,
    use_queue=False)

# Fit trained model
model.fit(train_dataset, nb_epoch=50)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)