Commit 62905451 authored by Joseph Gomes's avatar Joseph Gomes
Browse files

Update imports and script

parent 204d7d53
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ from deepchem.models.multitask import SingletaskToMultitask

from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskRegressor
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskFitTransformRegressor
from deepchem.models.tensorflow_models.robust_multitask import RobustMultitaskRegressor
from deepchem.models.tensorflow_models.robust_multitask import RobustMultitaskClassifier
from deepchem.models.tensorflow_models.lr import TensorflowLogisticRegression
+2 −0
Original line number Diff line number Diff line
@@ -13,3 +13,5 @@ from deepchem.trans.transformers import NormalizationTransformer
from deepchem.trans.transformers import BalancingTransformer
from deepchem.trans.transformers import CDFTransformer
from deepchem.trans.transformers import PowerTransformer
from deepchem.trans.transformers import CoulombRandomizationFitTransformer
from deepchem.trans.transformers import NormalizationFitTransformer
+64 −44
Original line number Diff line number Diff line
@@ -397,50 +397,6 @@ class BalancingTransformer(Transformer):
      w_balanced[one_indices, ind] = self.weights[ind][1]
    return (X, y, w_balanced)

class CoulombRandomizationFitTransformer():

  def __init__(self, seed=None):
    """Iniitialize coulomb matrix randomization transformation. """
    self.seed=seed

  def unpad_randomize_and_flatten(self, cm):
    """
    1. Remove zero padding on Coulomb Matrix
    2. Randomly permute the rows and columns for n_samples
    3. Flatten each sample to upper triangular portion
    Returns list of feature vectors
    """
    max_atom_number = len(cm) 
    atom_number = 0
    for i in cm[0]:
        if atom_number == max_atom_number: break
        elif i != 0.: atom_number += 1
        else: break

    upcm = cm[0:atom_number,0:atom_number]

    row_norms = np.asarray(
        [np.linalg.norm(row) for row in upcm], dtype=float)
    rng = np.random.RandomState(self.seed)
    e = rng.normal(size=row_norms.size)
    p = np.argsort(row_norms+e)
    rcm = upcm[p][:,p]
    rcm = pad_array(rcm, len(cm))
    rcm = rcm[np.triu_indices_from(rcm)]

    return rcm

  def X_transform(self, X):
    return unpad_randomize_and_flatten(X)

  def transform(self, dataset):
    raise NotImplementedError(
      "Cannot transform datasets with FitTransformer")

  def untransform(self, z):
    raise NotImplementedError(
      "Cannot untransform datasets with FitTransformer.")

class CoulombRandomizationTransformer(Transformer):

  def __init__(self, transform_X=False, transform_y=False,
@@ -667,3 +623,67 @@ class PowerTransformer(Transformer):
    z = z[:,:orig_len]
    z = np.power(z, 1/self.powers[0])
    return z

class CoulombRandomizationFitTransformer():

  def __init__(self, seed=None):
    """Iniitialize coulomb matrix randomization transformation. """
    self.seed=seed

  def unpad_and_randomize(self, cm):
    """
    1. Remove zero padding on Coulomb Matrix
    2. Randomly permute the rows and columns for n_samples
    3. Flatten each sample to upper triangular portion
    Returns list of feature vectors
    """
    print(cm.shape)
    max_atom_number = len(cm) 
    atom_number = 0
    for i in cm[0]:
        if atom_number == max_atom_number: break
        elif i != 0.: atom_number += 1
        else: break

    upcm = cm[0:atom_number,0:atom_number]

    row_norms = np.asarray(
        [np.linalg.norm(row) for row in upcm], dtype=float)
    rng = np.random.RandomState(self.seed)
    e = rng.normal(size=row_norms.size)
    p = np.argsort(row_norms+e)
    rcm = upcm[p][:,p]
    rcm = pad_array(rcm, len(cm))

    return rcm

  def X_transform(self, X):
    return self.unpad_and_randomize(X)

  def transform(self, dataset):
    raise NotImplementedError(
      "Cannot transform datasets with FitTransformer")

  def untransform(self, z):
    raise NotImplementedError(
      "Cannot untransform datasets with FitTransformer.")

class NormalizationFitTransformer():

  def normalize(self, X):
    """Normalize features. """
    print(X.shape)
    X_means = X.mean(axis=1)
    X_stds = (X-X_means).std()
    return (X-X_means)/X_stds   

  def X_transform(self, X):
    return self.normalize(X)

  def transform(self, dataset):
    raise NotImplementedError(
      "Cannot transform datasets with FitTransformer")

  def untransform(self, z):
    raise NotImplementedError(
      "Cannot untransform datasets with FitTransformer.")
+1 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ dataset = featurizer.featurize(input_file, data_dir)
random_splitter = dc.splits.RandomSplitter()
train_dataset, test_dataset = random_splitter.train_test_split(dataset, train_dir, test_dir)
transformers = [dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
fit_transformers = []
fit_transformers = [dc.trans.CoulombRandomizationFitTransformer(), dc.trans.NormalizationFitTransformer()]

for transformer in transformers:
    train_dataset = transformer.transform(train_dataset)