Commit f36b2a72 authored by Franklin Lee's avatar Franklin Lee
Browse files

Updated implementation of LogTransformer with base case

parent 92fb3508
Loading
Loading
Loading
Loading
+38 −26
Original line number Diff line number Diff line
@@ -188,8 +188,11 @@ class LogTransformer(Transformer):
    row = df.iloc[i]
    if self.transform_X:
      X = load_from_disk(os.path.join(data_dir, row['X-transformed']))
      end_feat=len(X[1,:])
      for j in xrange(end_feat):
      num_features=len(X[0])
      if self.features is None:
        X = np.log(X+1)
      else:
        for j in xrange(num_features):
          if j in self.features:
            X[:,j] = np.log(X[:,j]+1)
          else:
@@ -198,8 +201,11 @@ class LogTransformer(Transformer):

    if self.transform_y:
      y = load_from_disk(os.path.join(data_dir, row['y-transformed']))
      end_task=len(y[1,:])
      for j in xrange(end_task):
      num_tasks=len(y[0])
      if self.tasks is None:
        y = np.log(y+1)
      else:
        for j in xrange(num_tasks):
          if j in self.tasks:
            y[:,j] = np.log(y[:,j]+1)
          else:
@@ -211,16 +217,22 @@ class LogTransformer(Transformer):
    Undo transformation on provided data.
    """
    if self.transform_X:
      end_feat=len(z[1,:])
      for j in xrange(end_feat):
      num_features=len(z[0])
      if self.features is None:
        return np.exp(z)-1
      else:
        for j in xrange(num_features):
          if j in self.features:
            z[:,j] = np.exp(z[:,j])-1
          else:
            z[:,j] = z[:,j]
        return z
    elif self.transform_y:
      end_task=len(z[1,:])
      for j in xrange(end_task):
      num_tasks=len(z[0])
      if self.tasks is None:
        return np.exp(z)-1
      else:
        for j in xrange(num_tasks):
          if j in self.tasks:
            z[:,j] = np.exp(z[:,j])-1
          else:
+6 −9
Original line number Diff line number Diff line
@@ -20,9 +20,8 @@ from deepchem.datasets.tests import TestDatasetAPI
class TestTransformerAPI(TestDatasetAPI):
  """Test top-level API for transformer objects."""

  """
  def test_y_log_transformer(self):
    # Tests logarithmic data transformer.
    """Tests logarithmic data transformer."""
    solubility_dataset = self.load_solubility_data()
    log_transformer = LogTransformer(
        transform_y=True, dataset=solubility_dataset)
@@ -42,10 +41,9 @@ class TestTransformerAPI(TestDatasetAPI):

    # Check that untransform does the right thing.
    np.testing.assert_allclose(log_transformer.untransform(y_t), y)
  """
  """

  def test_X_log_transformer(self):
    # Tests logarithmic data transformer.
    """Tests logarithmic data transformer."""
    solubility_dataset = self.load_solubility_data()
    log_transformer = LogTransformer(
        transform_X=True, dataset=solubility_dataset)
@@ -65,9 +63,8 @@ class TestTransformerAPI(TestDatasetAPI):

    # Check that untransform does the right thing.
    np.testing.assert_allclose(log_transformer.untransform(X_t), X)
  """
 
  def test_y_log_transformer(self):
  def test_y_log_transformer_select(self):
    """Tests logarithmic data transformer with selection."""
    multitask_dataset = self.load_feat_multitask_data()
    dfe = pd.read_csv("../../models/tests/feat_multitask_example.csv")
@@ -98,7 +95,7 @@ class TestTransformerAPI(TestDatasetAPI):
    # Check that untransform does the right thing.
    np.testing.assert_allclose(log_transformer.untransform(y_t), y)

  def test_X_log_transformer(self):
  def test_X_log_transformer_select(self):
    #Tests logarithmic data transformer with selection.
    multitask_dataset = self.load_feat_multitask_data()
    dfe = pd.read_csv("../../models/tests/feat_multitask_example.csv")