Commit cc04280f authored by TaranSinghania's avatar TaranSinghania
Browse files

fixed formatting

parent 998d30da
Loading
Loading
Loading
Loading
+33 −37
Original line number Diff line number Diff line
@@ -40,8 +40,8 @@ class TestTransformers(unittest.TestCase):
  def test_y_log_transformer(self):
    """Tests logarithmic data transformer."""
    solubility_dataset = dc.data.tests.load_solubility_data()
    log_transformer = dc.trans.LogTransformer(transform_y=True,
                                              dataset=solubility_dataset)
    log_transformer = dc.trans.LogTransformer(
        transform_y=True, dataset=solubility_dataset)
    X, y, w, ids = (solubility_dataset.X, solubility_dataset.y,
                    solubility_dataset.w, solubility_dataset.ids)
    solubility_dataset = log_transformer.transform(solubility_dataset)
@@ -72,14 +72,14 @@ class TestTransformers(unittest.TestCase):
      dc.trans.transformers.Transformer(transform_w=True).transform(ul_dataset)

    # transforming X should be okay
    dc.trans.NormalizationTransformer(transform_X=True,
                                      dataset=ul_dataset).transform(ul_dataset)
    dc.trans.NormalizationTransformer(
        transform_X=True, dataset=ul_dataset).transform(ul_dataset)

  def test_X_log_transformer(self):
    """Tests logarithmic data transformer."""
    solubility_dataset = dc.data.tests.load_solubility_data()
    log_transformer = dc.trans.LogTransformer(transform_X=True,
                                              dataset=solubility_dataset)
    log_transformer = dc.trans.LogTransformer(
        transform_X=True, dataset=solubility_dataset)
    X, y, w, ids = (solubility_dataset.X, solubility_dataset.y,
                    solubility_dataset.w, solubility_dataset.ids)
    solubility_dataset = log_transformer.transform(solubility_dataset)
@@ -112,9 +112,8 @@ class TestTransformers(unittest.TestCase):
      tiid = dfe.columns.get_loc(task) - dfe.columns.get_loc(first_task)
      tid = np.concatenate((tid, np.array([tiid])))
    tasks = tid.astype(int)
    log_transformer = dc.trans.LogTransformer(transform_y=True,
                                              tasks=tasks,
                                              dataset=multitask_dataset)
    log_transformer = dc.trans.LogTransformer(
        transform_y=True, tasks=tasks, dataset=multitask_dataset)
    X, y, w, ids = (multitask_dataset.X, multitask_dataset.y,
                    multitask_dataset.w, multitask_dataset.ids)
    multitask_dataset = log_transformer.transform(multitask_dataset)
@@ -147,9 +146,8 @@ class TestTransformers(unittest.TestCase):
      fiid = dfe.columns.get_loc(feature) - dfe.columns.get_loc(first_feature)
      fid = np.concatenate((fid, np.array([fiid])))
    features = fid.astype(int)
    log_transformer = dc.trans.LogTransformer(transform_X=True,
                                              features=features,
                                              dataset=multitask_dataset)
    log_transformer = dc.trans.LogTransformer(
        transform_X=True, features=features, dataset=multitask_dataset)
    X, y, w, ids = (multitask_dataset.X, multitask_dataset.y,
                    multitask_dataset.w, multitask_dataset.ids)
    multitask_dataset = log_transformer.transform(multitask_dataset)
@@ -172,8 +170,8 @@ class TestTransformers(unittest.TestCase):
  def test_y_minmax_transformer(self):
    """Tests MinMax transformer. """
    solubility_dataset = dc.data.tests.load_solubility_data()
    minmax_transformer = dc.trans.MinMaxTransformer(transform_y=True,
                                                    dataset=solubility_dataset)
    minmax_transformer = dc.trans.MinMaxTransformer(
        transform_y=True, dataset=solubility_dataset)
    X, y, w, ids = (solubility_dataset.X, solubility_dataset.y,
                    solubility_dataset.w, solubility_dataset.ids)
    solubility_dataset = minmax_transformer.transform(solubility_dataset)
@@ -205,8 +203,8 @@ class TestTransformers(unittest.TestCase):
    y = np.random.randn(n_samples, n_tasks)
    dataset = dc.data.NumpyDataset(X, y)

    minmax_transformer = dc.trans.MinMaxTransformer(transform_y=True,
                                                    dataset=dataset)
    minmax_transformer = dc.trans.MinMaxTransformer(
        transform_y=True, dataset=dataset)
    w, ids = dataset.w, dataset.ids

    dataset = minmax_transformer.transform(dataset)
@@ -232,8 +230,8 @@ class TestTransformers(unittest.TestCase):

  def test_X_minmax_transformer(self):
    solubility_dataset = dc.data.tests.load_solubility_data()
    minmax_transformer = dc.trans.MinMaxTransformer(transform_X=True,
                                                    dataset=solubility_dataset)
    minmax_transformer = dc.trans.MinMaxTransformer(
        transform_X=True, dataset=solubility_dataset)
    X, y, w, ids = (solubility_dataset.X, solubility_dataset.y,
                    solubility_dataset.w, solubility_dataset.ids)
    solubility_dataset = minmax_transformer.transform(solubility_dataset)
@@ -319,9 +317,8 @@ class TestTransformers(unittest.TestCase):
    target = np.transpose(np.array(np.append([target], [target], axis=0)))
    gaussian_dataset = dc.data.tests.load_gaussian_cdf_data()
    bins = 1001
    cdf_transformer = dc.trans.CDFTransformer(transform_X=True,
                                              dataset=gaussian_dataset,
                                              bins=bins)
    cdf_transformer = dc.trans.CDFTransformer(
        transform_X=True, dataset=gaussian_dataset, bins=bins)
    X, y, w, ids = (gaussian_dataset.X, gaussian_dataset.y, gaussian_dataset.w,
                    gaussian_dataset.ids)
    gaussian_dataset = cdf_transformer.transform(gaussian_dataset, bins=bins)
@@ -345,9 +342,8 @@ class TestTransformers(unittest.TestCase):
    target = np.transpose(np.array(np.append([target], [target], axis=0)))
    gaussian_dataset = dc.data.tests.load_gaussian_cdf_data()
    bins = 1001
    cdf_transformer = dc.trans.CDFTransformer(transform_y=True,
                                              dataset=gaussian_dataset,
                                              bins=bins)
    cdf_transformer = dc.trans.CDFTransformer(
        transform_y=True, dataset=gaussian_dataset, bins=bins)
    X, y, w, ids = (gaussian_dataset.X, gaussian_dataset.y, gaussian_dataset.w,
                    gaussian_dataset.ids)
    gaussian_dataset = cdf_transformer.transform(gaussian_dataset, bins=bins)
@@ -424,8 +420,8 @@ class TestTransformers(unittest.TestCase):
    """Test Power transformer on Gaussian normal dataset."""
    gaussian_dataset = dc.data.tests.load_gaussian_cdf_data()
    powers = [1, 2, 0.5]
    power_transformer = dc.trans.PowerTransformer(transform_X=True,
                                                  powers=powers)
    power_transformer = dc.trans.PowerTransformer(
        transform_X=True, powers=powers)
    X, y, w, ids = (gaussian_dataset.X, gaussian_dataset.y, gaussian_dataset.w,
                    gaussian_dataset.ids)
    gaussian_dataset2 = power_transformer.transform(gaussian_dataset)
@@ -449,8 +445,8 @@ class TestTransformers(unittest.TestCase):
    """Test Power transformer on Gaussian normal dataset."""
    gaussian_dataset = dc.data.tests.load_gaussian_cdf_data()
    powers = [1, 2, 0.5]
    power_transformer = dc.trans.PowerTransformer(transform_y=True,
                                                  powers=powers)
    power_transformer = dc.trans.PowerTransformer(
        transform_y=True, powers=powers)
    X, y, w, ids = (gaussian_dataset.X, gaussian_dataset.y, gaussian_dataset.w,
                    gaussian_dataset.ids)
    gaussian_dataset2 = power_transformer.transform(gaussian_dataset)
@@ -501,8 +497,8 @@ class TestTransformers(unittest.TestCase):
      np.testing.assert_allclose(w_task[w_orig_task == 0],
                                 np.zeros_like(w_task[w_orig_task == 0]))
      # Check that sum of 0s equals sum of 1s in transformed for each task
      assert np.isclose(np.sum(w_task[y_task == 0]),
                        np.sum(w_task[y_task == 1]))
      assert np.isclose(
          np.sum(w_task[y_task == 0]), np.sum(w_task[y_task == 1]))

  def test_multitask_balancing_transformer(self):
    """Test balancing transformer on multitask dataset."""
@@ -529,8 +525,8 @@ class TestTransformers(unittest.TestCase):
      np.testing.assert_allclose(w_task[w_orig_task == 0],
                                 np.zeros_like(w_task[w_orig_task == 0]))
      # Check that sum of 0s equals sum of 1s in transformed for each task
      assert np.isclose(np.sum(w_task[y_task == 0]),
                        np.sum(w_task[y_task == 1]))
      assert np.isclose(
          np.sum(w_task[y_task == 0]), np.sum(w_task[y_task == 1]))

  def test_coulomb_fit_transformer(self):
    """Test coulomb fit transformer on singletask dataset."""
@@ -559,7 +555,8 @@ class TestTransformers(unittest.TestCase):
    y_test = np.zeros((test_samples, n_tasks))
    w_test = np.ones((test_samples, n_tasks))
    test_dataset = dc.data.NumpyDataset(X_test, y_test, w_test, ids=None)
    sims = np.sum(X_test[0, :] * X, axis=1, dtype=float) / np.sum(
    sims = np.sum(
        X_test[0, :] * X, axis=1, dtype=float) / np.sum(
            np.sign(X_test[0, :] + X), axis=1, dtype=float)
    sims = sorted(sims, reverse=True)
    IRV_transformer = dc.trans.IRVTransformer(10, n_tasks, dataset)
@@ -691,9 +688,8 @@ class TestTransformers(unittest.TestCase):
    tasks = ["outcome"]
    input_file = os.path.join(self.current_dir,
                              "../../models/tests/example_regression.csv")
    loader = dc.data.CSVLoader(tasks=tasks,
                               smiles_field="smiles",
                               featurizer=featurizer)
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(input_file)
    transformer = dc.trans.DAGTransformer(max_atoms=50)
    dataset = transformer.transform(dataset)
+58 −62
Original line number Diff line number Diff line
@@ -154,7 +154,8 @@ class MinMaxTransformer(Transformer):
      if len(dataset.y.shape) > 1:
        assert len(self.y_min) == dataset.y.shape[1]

    super(MinMaxTransformer, self).__init__(transform_X=transform_X,
    super(MinMaxTransformer, self).__init__(
        transform_X=transform_X,
        transform_y=transform_y,
        transform_w=transform_w,
        dataset=dataset)
@@ -231,14 +232,15 @@ class NormalizationTransformer(Transformer):
      self.grad = np.reshape(true_grad, (true_grad.shape[0], -1, 3))
      self.ydely_means = ydely_means

    super(NormalizationTransformer, self).__init__(transform_X=transform_X,
    super(NormalizationTransformer, self).__init__(
        transform_X=transform_X,
        transform_y=transform_y,
        transform_w=transform_w,
        dataset=dataset)

  def transform(self, dataset, parallel=False):
    return super(NormalizationTransformer, self).transform(dataset,
                                                           parallel=parallel)
    return super(NormalizationTransformer, self).transform(
        dataset, parallel=parallel)

  def transform_array(self, X, y, w):
    """Transform the data in a set of (X, y, w) arrays."""
@@ -289,8 +291,8 @@ class NormalizationTransformer(Transformer):

      grad_means = self.y_means[1:]
      energy_var = self.y_stds[0]
      grad_var = 1 / energy_var * (self.ydely_means -
                                   self.y_means[0] * self.y_means[1:])
      grad_var = 1 / energy_var * (
          self.ydely_means - self.y_means[0] * self.y_means[1:])
      energy = tasks[:, 0]
      transformed_grad = []

@@ -348,7 +350,8 @@ class ClippingTransformer(Transformer):
      Maximum absolute value for y

    """
    super(ClippingTransformer, self).__init__(transform_X=transform_X,
    super(ClippingTransformer, self).__init__(
        transform_X=transform_X,
        transform_y=transform_y,
        transform_w=transform_w,
        dataset=dataset)
@@ -402,9 +405,8 @@ class LogTransformer(Transformer):
    self.features = features
    self.tasks = tasks
    """Initialize log  transformation."""
    super(LogTransformer, self).__init__(transform_X=transform_X,
                                         transform_y=transform_y,
                                         dataset=dataset)
    super(LogTransformer, self).__init__(
        transform_X=transform_X, transform_y=transform_y, dataset=dataset)

  def transform_array(self, X, y, w):
    """Transform the data in a set of (X, y, w) arrays."""
@@ -467,7 +469,8 @@ class BalancingTransformer(Transformer):
               transform_w=False,
               dataset=None,
               seed=None):
    super(BalancingTransformer, self).__init__(transform_X=transform_X,
    super(BalancingTransformer, self).__init__(
        transform_X=transform_X,
        transform_y=transform_y,
        transform_w=transform_w,
        dataset=dataset)
@@ -514,10 +517,7 @@ class CDFTransformer(Transformer):
  """Histograms the data and assigns values based on sorted list."""
  """Acts like a Cumulative Distribution Function (CDF)."""

  def __init__(self,
               transform_X=False,
               transform_y=False,
               dataset=None,
  def __init__(self, transform_X=False, transform_y=False, dataset=None,
               bins=2):
    self.transform_X = transform_X
    self.transform_y = transform_y
@@ -838,9 +838,9 @@ class IRVTransformer():
    n_features = X_target.shape[1]
    print('start similarity calculation')
    time1 = time.time()
    similarity = IRVTransformer.matrix_mul(X_target, np.transpose(self.X)) / (
        n_features -
        IRVTransformer.matrix_mul(1 - X_target, np.transpose(1 - self.X)))
    similarity = IRVTransformer.matrix_mul(X_target, np.transpose(
        self.X)) / (n_features - IRVTransformer.matrix_mul(
            1 - X_target, np.transpose(1 - self.X)))
    time2 = time.time()
    print('similarity calculation takes %i s' % (time2 - time1))
    for i in range(self.n_tasks):
@@ -867,8 +867,8 @@ class IRVTransformer():
        partial_result = np.matmul(
            X1[X1_id * shard_size:min((X1_id + 1) *
                                      shard_size, X1_shape[0]), :],
            X2[:,
               X2_id * shard_size:min((X2_id + 1) * shard_size, X2_shape[1])])
            X2[:, X2_id * shard_size:min((X2_id + 1) *
                                         shard_size, X2_shape[1])])
        # calculate matrix multiplicatin on slices
        if result.size == 1:
          result = partial_result
@@ -888,8 +888,8 @@ class IRVTransformer():
    X_trans = []
    for count in range(X_length // 5000 + 1):
      X_trans.append(
          self.X_transform(dataset.X[count * 5000:min((count + 1) *
                                                      5000, X_length), :]))
          self.X_transform(
              dataset.X[count * 5000:min((count + 1) * 5000, X_length), :]))
    X_trans = np.concatenate(X_trans, axis=0)
    return NumpyDataset(X_trans, dataset.y, dataset.w, ids=None)

@@ -1097,8 +1097,8 @@ class ANITransformer(Transformer):
      while True:
        end = min((start + 1) * batch_size, X.shape[0])
        X_batch = X[(start * batch_size):end]
        output = self.sess.run([self.outputs],
                               feed_dict={self.inputs: X_batch})[0]
        output = self.sess.run(
            [self.outputs], feed_dict={self.inputs: X_batch})[0]
        X_out.append(output)
        num_transformed = num_transformed + X_batch.shape[0]
        print('%i samples transformed' % num_transformed)
@@ -1132,7 +1132,8 @@ class ANITransformer(Transformer):
      radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
      angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers,
                                          coordinates)
      self.outputs = tf.concat([
      self.outputs = tf.concat(
          [
              tf.cast(tf.expand_dims(atom_numbers, 2), tf.float32), radial_sym,
              angular_sym
          ],
@@ -1178,8 +1179,8 @@ class ANITransformer(Transformer):
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(tf.expand_dims(
            atom_numbers_embedded[:, :, atom_type], axis=1),
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
@@ -1233,9 +1234,8 @@ class ANITransformer(Transformer):
        for atom_type_k in self.atom_cases[id_j:]:
          selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
                           tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
          selected_atoms = tf.expand_dims(tf.expand_dims(selected_atoms,
                                                         axis=1),
                                          axis=4)
          selected_atoms = tf.expand_dims(
              tf.expand_dims(selected_atoms, axis=1), axis=4)
          out_tensors.append(
              tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
      return tf.concat(out_tensors, axis=2)
@@ -1263,7 +1263,8 @@ class FeaturizationTransformer(Transformer):
    self.featurizer = featurizer
    if not transform_X:
      raise ValueError("FeaturizingTransfomer can only be used on X")
    super(FeaturizationTransformer, self).__init__(transform_X=transform_X,
    super(FeaturizationTransformer, self).__init__(
        transform_X=transform_X,
        transform_y=transform_y,
        transform_w=transform_w,
        dataset=dataset)
@@ -1312,11 +1313,8 @@ class DataTransforms(Transformer):
             (‘constant’, ‘nearest’, ‘reflect’ or ‘wrap’). Default is ‘constant’.
              order - The order of the spline interpolation, default is 3. The order has to be in the range 0-5.
    """
    return scipy.ndimage.rotate(self.Image,
                                angle,
                                reshape=reshape,
                                mode=mode,
                                order=order)
    return scipy.ndimage.rotate(
        self.Image, angle, reshape=reshape, mode=mode, order=order)

  def gaussian_blur(self, sigma=0.2):
    """ Adds gaussian noise to the image
@@ -1352,13 +1350,11 @@ class DataTransforms(Transformer):
          order - The order of the spline interpolation, default is 3. The order has to be in the range 0-5.
          """
    if len(self.Image.shape) == 2:
      return scipy.ndimage.shift(self.Image, [height, width],
                                 order=order,
                                 mode=mode)
      return scipy.ndimage.shift(
          self.Image, [height, width], order=order, mode=mode)
    if len(self.Image.shape == 3):
      return scipy.ndimage.shift(self.Image, [height, width, 0],
                                 order=order,
                                 mode=mode)
      return scipy.ndimage.shift(
          self.Image, [height, width, 0], order=order, mode=mode)

  def gaussian_noise(self, mean=0, std=25.5):
    '''Adds gaussian noise to the image