Commit 17540813 authored by Vignesh's avatar Vignesh
Browse files

Merge branch 'transfer-learning-api' of https://github.com/VIGS25/deepchem...

Merge branch 'transfer-learning-api' of https://github.com/VIGS25/deepchem into transfer-learning-api
parents c3688a96 20bbb7f6
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -201,6 +201,9 @@ class SmilesToImage(Featurizer):
      # Compute atom properties
      atom_props = np.array([[atom.GetAtomicNum()] for atom in cmol.GetAtoms()])

      bond_props = bond_props.astype(np.float32)
      atom_props = atom_props.astype(np.float32)

    else:
      # Setup image
      img = np.zeros((self.img_size, self.img_size, 4))
@@ -218,6 +221,13 @@ class SmilesToImage(Featurizer):
          atom.GetHybridization().real,
      ] for atom in cmol.GetAtoms()])

      bond_props = bond_props.astype(np.float32)
      atom_props = atom_props.astype(np.float32)

      partial_charges = atom_props[:, 1]
      if np.any(np.isnan(partial_charges)):
        return []

    frac = np.linspace(0, 1, int(1 / self.res * 2))
    # Reshape done for proper broadcast
    frac = frac.reshape(-1, 1, 1)
+149 −129
Original line number Diff line number Diff line
"""Model-Agnostic Meta-Learning (MAML) algorithm for low data learning."""

from deepchem.models.tensorgraph.layers import Layer
from deepchem.models.tensorgraph.optimizers import Adam, GradientDescent
import numpy as np
import os
import shutil
import tempfile
@@ -17,23 +17,32 @@ class MetaLearner(object):
  data for training it on a large (possibly infinite) set of different tasks.
  """

  @property
  def loss(self):
    """Get the model's loss function, represented as a Layer or Tensor."""
  def compute_model(self, inputs, variables, training):
    """Compute the model for a set of inputs and variables.

    Parameters
    ----------
    inputs: list of tensors
      the inputs to the model
    variables: list of tensors
      the values to use for the model's variables.  This might be the actual
      variables (as returned by the MetaLearner's variables property), or
      alternatively it might be the values of those variables after one or more
      steps of gradient descent for the current task.
    training: bool
      indicates whether the model is being invoked for training or prediction

    Returns
    -------
    (loss, outputs) where loss is the value of the model's loss function, and
    outputs is a list of the model's outputs
    """
    raise NotImplemented("Subclasses must implement this")

  @property
  def variables(self):
    """Get the list of Tensorflow variables to train.

    The default implementation returns all trainable variables in the graph.  This is usually
    what you want, but subclasses can customize it if needed.
    """
    loss = self.loss
    if isinstance(loss, Layer):
      loss = loss.out_tensor
    with loss.graph.as_default():
      return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    """Get the list of Tensorflow variables to train."""
    raise NotImplemented("Subclasses must implement this")

  def select_task(self):
    """Select a new task to train on.
@@ -47,8 +56,8 @@ class MetaLearner(object):
  def get_batch(self):
    """Get a batch of data for training.

    This should return the data in the form of a Tensorflow feed dict, that is, a dict
    mapping tensors to values.  This will usually be called twice for each task, and should
    This should return the data as a list of arrays, one for each of the model's
    inputs.  This will usually be called twice for each task, and should
    return a different batch on each call.
    """
    raise NotImplemented("Subclasses must implement this")
@@ -85,9 +94,9 @@ class MAML(object):
    ----------
    learner: MetaLearner
      defines the meta-learning problem
    learning_rate: float, Layer, or Tensor
    learning_rate: float or Tensor
      the learning rate to use for optimizing each task (not to be confused with the one used
      for meta-learning).  This can optionally be made a variable (represented as a Layer or
      for meta-learning).  This can optionally be made a variable (represented as a
      Tensor), in which case the learning rate will itself be learnable.
    optimization_steps: int
      the number of steps of gradient descent to perform for each task
@@ -101,21 +110,10 @@ class MAML(object):
    """
    # Record inputs.

    raise Exception(
        'MAML does not currently work correctly.  It needs to be rewritten to be compatible with modern TensorFlow'
    )
    self.learner = learner
    if isinstance(learner.loss, Layer):
      self._loss = learner.loss.out_tensor
    else:
      self._loss = learner.loss
    if isinstance(learning_rate, Layer):
      self._learning_rate = learning_rate.out_tensor
    else:
    self._learning_rate = learning_rate
    self.meta_batch_size = meta_batch_size
    self.optimizer = optimizer
    self._graph = self._loss.graph

    # Create the output directory if necessary.

@@ -128,37 +126,42 @@ class MAML(object):
      self._model_dir_is_temp = True
    self.model_dir = model_dir
    self.save_file = "%s/%s" % (self.model_dir, "model")
    with self._graph.as_default():
      # Create duplicate placeholders for meta-optimization.

    learner.select_task()
      self._meta_placeholders = {}
      for p in learner.get_batch().keys():
        name = 'meta/' + p.name.split(':')[0]
        self._meta_placeholders[p] = tf.placeholder(p.dtype, p.shape, name)
    example_inputs = learner.get_batch()
    self._input_shapes = [(None,) + i.shape[1:] for i in example_inputs]
    self._input_dtypes = [x.dtype for x in example_inputs]
    self._input_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=s)
        for s, t in zip(self._input_shapes, self._input_dtypes)
    ]
    self._meta_placeholders = [
        tf.placeholder(dtype=tf.as_dtype(t), shape=s)
        for s, t in zip(self._input_shapes, self._input_dtypes)
    ]
    variables = learner.variables
    self._loss, self._outputs = learner.compute_model(self._input_placeholders,
                                                      variables, False)
    loss, _ = learner.compute_model(self._input_placeholders, variables, True)

      # Create the loss function for meta-optimization.
    # Build the meta-learning model.

      updated_loss = self._loss
      updated_variables = learner.variables
    updated_variables = variables
    for i in range(optimization_steps):
        gradients = tf.gradients(updated_loss, updated_variables)
      gradients = tf.gradients(loss, updated_variables)
      updated_variables = [
          v if g is None else v - self._learning_rate * g
          for v, g in zip(updated_variables, gradients)
      ]
        replacements = dict(
            (tf.convert_to_tensor(v1), v2)
            for v1, v2 in zip(learner.variables, updated_variables))
      if i == optimization_steps - 1:
        # In the final loss, use different placeholders for all inputs so the loss will be
        # computed from a different batch.

          for p in self._meta_placeholders:
            replacements[p] = self._meta_placeholders[p]
        updated_loss = tf.contrib.graph_editor.graph_replace(
            self._loss, replacements)
      self._meta_loss = updated_loss
        inputs = self._meta_placeholders
      else:
        inputs = self._input_placeholders
      loss, outputs = learner.compute_model(inputs, updated_variables, True)
    self._meta_loss = loss

    # Create variables for accumulating the gradients.

@@ -169,9 +172,7 @@ class MAML(object):
        del variables[i]
        del gradients[i]
    zero_gradients = [tf.zeros(g.shape, g.dtype) for g in gradients]
      summed_gradients = [
          tf.Variable(z, trainable=False) for z in zero_gradients
      ]
    summed_gradients = [tf.Variable(z, trainable=False) for z in zero_gradients]
    self._clear_gradients = tf.group(
        *[s.assign(z) for s, z in zip(summed_gradients, zero_gradients)])
    self._add_gradients = tf.group(
@@ -187,6 +188,7 @@ class MAML(object):
    self._task_train_op = task_optimizer._create_optimizer(
        self._global_step).minimize(self._loss)
    self._session = tf.Session()
    self._session.run(tf.global_variables_initializer())

    # Create a Checkpoint for saving.

@@ -214,11 +216,9 @@ class MAML(object):
    checkpoint_interval: float
      the time interval at which to save checkpoints, measured in seconds
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
      if True, restore the model from the most recent checkpoint before training
      it further
    """
    with self._graph.as_default():
      self._session.run(tf.global_variables_initializer())
    if restore:
      self.restore()
    manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir,
@@ -231,17 +231,18 @@ class MAML(object):
      self._session.run(self._clear_gradients)
      for j in range(self.meta_batch_size):
        self.learner.select_task()
          feed_dict = self.learner.get_batch()
        inputs = self.learner.get_batch()
        feed_dict = {}
        feed_dict[self._global_step] = i
          for key, value in self.learner.get_batch().items():
            feed_dict[self._meta_placeholders[key]] = value
        for k in range(len(inputs)):
          feed_dict[self._input_placeholders[k]] = inputs[k]
          feed_dict[self._meta_placeholders[k]] = inputs[k]
        self._session.run(self._add_gradients, feed_dict=feed_dict)
      self._session.run(self._meta_train_op)

      # Do checkpointing.

        if i == steps - 1 or time.time(
        ) >= checkpoint_time + checkpoint_interval:
      if i == steps - 1 or time.time() >= checkpoint_time + checkpoint_interval:
        with self._session.as_default():
          manager.save()
        checkpoint_time = time.time()
@@ -251,7 +252,6 @@ class MAML(object):
    last_checkpoint = tf.train.latest_checkpoint(self.model_dir)
    if last_checkpoint is None:
      raise ValueError('No checkpoint found')
    with self._graph.as_default():
    self._checkpoint.restore(last_checkpoint).run_restore_ops(self._session)

  def train_on_current_task(self, optimization_steps=1, restore=True):
@@ -266,7 +266,27 @@ class MAML(object):
    """
    if restore:
      self.restore()
    with self._graph.as_default():
      feed_dict = self.learner.get_batch()
    inputs = self.learner.get_batch()
    feed_dict = {}
    for p, v in zip(self._input_placeholders, inputs):
      feed_dict[p] = v
    for i in range(optimization_steps):
      self._session.run(self._task_train_op, feed_dict=feed_dict)

  def predict_on_batch(self, inputs):
    """Compute the model's outputs for a batch of inputs.

    Parameters
    ----------
    inputs: list of arrays
      the inputs to the model

    Returns
    -------
    (loss, outputs) where loss is the value of the model's loss function, and
    outputs is a list of the model's outputs
    """
    feed_dict = {}
    for p, v in zip(self._input_placeholders, inputs):
      feed_dict[p] = v
    return self._session.run([self._loss, self._outputs], feed_dict=feed_dict)
+97 −90
Original line number Diff line number Diff line
from flaky import flaky

import deepchem as dc
from deepchem.models.tensorgraph.layers import Feature, Label, Dense, L2Loss
import numpy as np
import tensorflow as tf
import unittest

# class TestMAML(unittest.TestCase):
#
#   @flaky
#   def test_sine(self):
#     """Test meta-learning for sine function."""
#
#     # This is a MetaLearner that learns to generate sine functions with variable
#     # amplitude and phase.
#
#     class SineLearner(dc.metalearning.MetaLearner):
#
#       def __init__(self):
#         self.batch_size = 10
#         self.tg = dc.models.TensorGraph(use_queue=False)
#         self.features = Feature(shape=(None, 1))
#         self.labels = Label(shape=(None, 1))
#         hidden1 = Dense(
#             in_layers=self.features, out_channels=40, activation_fn=tf.nn.relu)
#         hidden2 = Dense(
#             in_layers=hidden1, out_channels=40, activation_fn=tf.nn.relu)
#         output = Dense(in_layers=hidden2, out_channels=1)
#         loss = L2Loss(in_layers=[output, self.labels])
#         self.tg.add_output(output)
#         self.tg.set_loss(loss)
#         with self.tg._get_tf("Graph").as_default():
#           self.tg.build()
#
#       @property
#       def loss(self):
#         return self.tg.loss
#
#       def select_task(self):
#         self.amplitude = 5.0 * np.random.random()
#         self.phase = np.pi * np.random.random()
#
#       def get_batch(self):
#         x = np.random.uniform(-5.0, 5.0, (self.batch_size, 1))
#         feed_dict = {}
#         feed_dict[self.features.out_tensor] = x
#         feed_dict[self.labels.out_tensor] = self.amplitude * np.sin(
#             x + self.phase)
#         return feed_dict
#
#     # Optimize it.
#
#     learner = SineLearner()
#     maml = dc.metalearning.MAML(learner, meta_batch_size=4)
#     maml.fit(12000)
#
#     # Test it out on some new tasks and see how it works.
#
#     loss1 = []
#     loss2 = []
#     for i in range(50):
#       learner.select_task()
#       feed_dict = learner.get_batch()
#       for key, value in learner.get_batch().items():
#         feed_dict[maml._meta_placeholders[key]] = value
#       loss1.append(
#           np.average(
#               np.sqrt(maml._session.run(maml._loss, feed_dict=feed_dict))))
#       loss2.append(
#           np.average(
#               np.sqrt(maml._session.run(maml._meta_loss, feed_dict=feed_dict))))
#
#     # Initially the model should do a bad job of fitting the sine function.
#
#     assert np.average(loss1) > 1.0
#
#     # After one step of optimization it should do much better.
#
#     assert np.average(loss2) < 1.0
#
#     # Verify that we can create a new MAML object, reload the parameters from the first one, and
#     # get the same result.
#
#     new_maml = dc.metalearning.MAML(learner, model_dir=maml.model_dir)
#     new_maml.restore()
#     new_loss = np.average(
#         np.sqrt(new_maml._session.run(new_maml._loss, feed_dict=feed_dict)))
#     assert new_loss == loss1[-1]
#
#     # Do the same thing, only using the "restore" argument to fit().
#
#     new_maml = dc.metalearning.MAML(learner, model_dir=maml.model_dir)
#     new_maml.fit(0, restore=True)
#     new_loss = np.average(
#         np.sqrt(new_maml._session.run(new_maml._loss, feed_dict=feed_dict)))
#     assert new_loss == loss1[-1]

class TestMAML(unittest.TestCase):

  @flaky
  def test_sine(self):
    """Test meta-learning for sine function."""

    # This is a MetaLearner that learns to generate sine functions with variable
    # amplitude and phase.

    class SineLearner(dc.metalearning.MetaLearner):

      def __init__(self):
        self.batch_size = 10
        self.w1 = tf.Variable(np.random.normal(size=[1, 40], scale=1.0))
        self.w2 = tf.Variable(
            np.random.normal(size=[40, 40], scale=np.sqrt(1 / 40)))
        self.w3 = tf.Variable(
            np.random.normal(size=[40, 1], scale=np.sqrt(1 / 40)))
        self.b1 = tf.Variable(np.zeros(40))
        self.b2 = tf.Variable(np.zeros(40))
        self.b3 = tf.Variable(np.zeros(1))

      def compute_model(self, inputs, variables, training):
        x, y = inputs
        w1, w2, w3, b1, b2, b3 = variables
        dense1 = tf.nn.relu(tf.matmul(x, w1) + b1)
        dense2 = tf.nn.relu(tf.matmul(dense1, w2) + b2)
        output = tf.matmul(dense2, w3) + b3
        loss = tf.reduce_mean(tf.square(output - y))
        return loss, [output]

      @property
      def variables(self):
        return [self.w1, self.w2, self.w3, self.b1, self.b2, self.b3]

      def select_task(self):
        self.amplitude = 5.0 * np.random.random()
        self.phase = np.pi * np.random.random()

      def get_batch(self):
        x = np.random.uniform(-5.0, 5.0, (self.batch_size, 1))
        return [x, self.amplitude * np.sin(x + self.phase)]

    # Optimize it.

    learner = SineLearner()
    optimizer = dc.models.tensorgraph.optimizers.Adam(learning_rate=5e-3)
    maml = dc.metalearning.MAML(learner, meta_batch_size=4, optimizer=optimizer)
    maml.fit(9000)

    # Test it out on some new tasks and see how it works.

    loss1 = []
    loss2 = []
    for i in range(50):
      learner.select_task()
      batch = learner.get_batch()
      feed_dict = {}
      for j in range(len(batch)):
        feed_dict[maml._input_placeholders[j]] = batch[j]
        feed_dict[maml._meta_placeholders[j]] = batch[j]
      loss1.append(
          np.average(
              np.sqrt(maml._session.run(maml._loss, feed_dict=feed_dict))))
      loss2.append(
          np.average(
              np.sqrt(maml._session.run(maml._meta_loss, feed_dict=feed_dict))))

    # Initially the model should do a bad job of fitting the sine function.

    assert np.average(loss1) > 1.0

    # After one step of optimization it should do much better.

    assert np.average(loss2) < 1.0

    # If we train on the current task, the loss should go down.

    maml.train_on_current_task()
    assert np.average(
        np.sqrt(maml._session.run(maml._loss, feed_dict=feed_dict))) < loss1[-1]

    # Verify that we can create a new MAML object, reload the parameters from the first one, and
    # get the same result.

    new_maml = dc.metalearning.MAML(learner, model_dir=maml.model_dir)
    new_maml.restore()
    loss, outputs = new_maml.predict_on_batch(batch)
    assert np.sqrt(loss) == loss1[-1]

    # Do the same thing, only using the "restore" argument to fit().

    new_maml = dc.metalearning.MAML(learner, model_dir=maml.model_dir)
    new_maml.fit(0, restore=True)
    loss, outputs = new_maml.predict_on_batch(batch)
    assert np.sqrt(loss) == loss1[-1]
+44 −19
Original line number Diff line number Diff line
import numpy as np
import tensorflow as tf
import time
import logging
import os

logger = logging.getLogger(__name__)

from deepchem.data import NumpyDataset
from deepchem.models.losses import Loss
@@ -348,7 +352,6 @@ class KerasModel(Model):
      inputs, labels, weights = self._prepare_batch(batch)
      self._tensorboard_step += 1
      should_log = (
          self.tensorboard and
          self._tensorboard_step % self.tensorboard_log_frequency == 0)
      if tf.executing_eagerly():

@@ -401,7 +404,7 @@ class KerasModel(Model):
                  loss_tensor, global_step=self._global_step, var_list=vars)
            train_op = self._custom_train_op[op_key]
        fetches = [train_op, self._loss_tensor, self._global_step]
        if should_log:
        if self.tensorboard and should_log:
          fetches.append(self._summary_ops)
        feed_dict = dict(zip(self._input_placeholders, inputs))
        feed_dict.update(dict(zip(self._label_placeholders, labels)))
@@ -409,33 +412,36 @@ class KerasModel(Model):
        fetched_values = self.session.run(fetches, feed_dict=feed_dict)
        avg_loss += fetched_values[1]
        current_step = fetched_values[2]
        if should_log:

        if self.tensorboard and should_log:
          self._summary_writer.reopen()
          self._summary_writer.add_summary(
              fetched_values[3], global_step=current_step)
          self._summary_writer.close()

      # Report progress and write checkpoints.

      averaged_batches += 1
      if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1:
        self._exec_with_session(lambda: manager.save())
      if should_log:
        avg_loss = float(avg_loss) / averaged_batches
        print(
        logger.info(
            'Ending global_step %d: Average loss %g' % (current_step, avg_loss))
        avg_loss = 0.0
        averaged_batches = 0

    # Report final results.
      if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1:
        self._exec_with_session(lambda: manager.save())

    if checkpoint_interval > 0:
    # Report final results.
    if averaged_batches > 0:
      avg_loss = float(avg_loss) / averaged_batches
        print(
      logger.info(
          'Ending global_step %d: Average loss %g' % (current_step, avg_loss))

    if checkpoint_interval > 0:
      self._exec_with_session(lambda: manager.save())

    time2 = time.time()
      print("TIMING: model fitting took %0.3f s" % (time2 - time1))
    logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss

  def fit_on_batch(self, X, y, w, variables=None, loss=None):
@@ -898,7 +904,7 @@ class KerasModel(Model):
          pad_batches=pad_batches):
        yield ([X_b], [y_b], [w_b])

  def save_checkpoint(self, max_checkpoints_to_keep=5):
  def save_checkpoint(self, max_checkpoints_to_keep=5, model_dir=None):
    """Save a checkpoint to disk.

    Usually you do not need to call this method, since fit() saves checkpoints
@@ -909,9 +915,15 @@ class KerasModel(Model):
    ----------
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    model_dir: str, default None
      Model directory to save checkpoint to. If None, revert to self.model_dir
    """
    self._ensure_built()
    manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir,
    if model_dir is None:
      model_dir = self.model_dir
    if not os.path.exists(model_dir):
      os.makedirs(model_dir)
    manager = tf.train.CheckpointManager(self._checkpoint, model_dir,
                                         max_checkpoints_to_keep)
    self._exec_with_session(lambda: manager.save())

@@ -922,10 +934,18 @@ class KerasModel(Model):
      with self.session.as_default():
        f()

  def get_checkpoints(self):
    """Get a list of all available checkpoint files."""
    return tf.train.get_checkpoint_state(
        self.model_dir).all_model_checkpoint_paths
  def get_checkpoints(self, model_dir=None):
    """Get a list of all available checkpoint files.

    Parameters
    ----------
    model_dir: str, default None
      Directory to get list of checkpoints from. Reverts to self.model_dir if None

    """
    if model_dir is None:
      model_dir = self.model_dir
    return tf.train.get_checkpoint_state(model_dir).all_model_checkpoint_paths

  def restore(self, checkpoint=None, model_dir=None, session=None):
    """Reload the values of all variables from a checkpoint file.
@@ -936,6 +956,10 @@ class KerasModel(Model):
      the path to the checkpoint file to load.  If this is None, the most recent
      checkpoint will be chosen automatically.  Call get_checkpoints() to get a
      list of all available checkpoints.
    model_dir: str, default None
      Directory to restore checkpoint from. If None, use self.model_dir.
    session: tf.Session(), default None
      Session to run restore ops under. If None, self.session is used.
    """
    self._ensure_built()
    if model_dir is None:
@@ -1069,5 +1093,6 @@ class _StandardLoss(object):
        shape = w.shape
      shape = tuple(-1 if x is None else x for x in shape)
      w = tf.reshape(w, shape + (1,) * (len(losses.shape) - len(w.shape)))

    loss = losses * w
    return tf.reduce_mean(loss) + sum(self.model.losses)
+35 −9
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ def load_chembl25(featurizer="smiles2seq",
                  save_dir=None,
                  split_seed=None,
                  reload=True,
                  transformer_type='minmax',
                  **kwargs):
  """Loads the ChEMBL25 dataset, featurizes it, and does a split.
  Parameters
@@ -68,6 +69,8 @@ def load_chembl25(featurizer="smiles2seq",
    Seed to be used for splitting the dataset
  reload: bool, default True
    Whether to reload saved dataset
  transformer_type: str, default minmax:
    Transformer to use
  """
  if data_dir is None:
    data_dir = DEFAULT_DIR
@@ -121,10 +124,17 @@ def load_chembl25(featurizer="smiles2seq",
      input_files=[dataset_file], shard_size=10000, data_dir=save_folder)

  if split is None:
    transformer = [
    if transformer_type == "minmax":
      transformers = [
          dc.trans.MinMaxTransformer(
              transform_X=False, transform_y=True, dataset=dataset)
      ]
    else:
      transformers = [
          dc.trans.NormalizationTransformer(
              transform_X=False, transform_y=True, dataset=dataset)
      ]

    logger.info("Split is None, about to transform dataset.")
    for transformer in transformers:
      dataset = transformer.transform(dataset)
@@ -139,11 +149,27 @@ def load_chembl25(featurizer="smiles2seq",
  logger.info("About to split data with {} splitter.".format(split))
  splitter = splitters[split]

  train, valid, test = splitter.train_valid_test_split(dataset, seed=split_seed)
  frac_train = kwargs.get('frac_train', 4 / 6)
  frac_valid = kwargs.get('frac_valid', 1 / 6)
  frac_test = kwargs.get('frac_test', 1 / 6)

  train, valid, test = splitter.train_valid_test_split(
      dataset,
      seed=split_seed,
      frac_train=frac_train,
      frac_test=frac_test,
      frac_valid=frac_valid)
  if transformer_type == "minmax":
    transformers = [
        dc.trans.MinMaxTransformer(
            transform_X=False, transform_y=True, dataset=train)
    ]
  else:
    transformers = [
        dc.trans.NormalizationTransformer(
            transform_X=False, transform_y=True, dataset=train)
    ]

  for transformer in transformers:
    train = transformer.transform(train)
    valid = transformer.transform(valid)
Loading