Commit 4dc5e97c authored by Joseph Gomes's avatar Joseph Gomes
Browse files

Update Coulomb Matrix featurizer tests and examples

parent 1bba913f
Loading
Loading
Loading
Loading
+24 −2
Original line number Diff line number Diff line
@@ -26,15 +26,27 @@ class CoulombMatrix(Featurizer):
  max_atoms : int
      Maximum number of atoms for any molecule in the dataset. Used to
      pad the Coulomb matrix.
  remove_hydrogens : bool, optional (default True)
  remove_hydrogens : bool, optional (default False)
      Whether to remove hydrogens before constructing Coulomb matrix.
  randomize : bool, optional (default True)
  randomize : bool, optional (default False)
      Whether to randomize Coulomb matrices to remove dependence on atom
      index order.
  upper_tri : bool, optional (default False)
      Whether to return the upper triangular portion of the Coulomb matrix.
  n_samples : int, optional (default 1)
      Number of random Coulomb matrices to generate if randomize is True.
  seed : int, optional
      Random seed.

  Example:

  >>> featurizers = dc.feat.CoulombMatrix(max_atoms=23)
  >>> input_file = "input.sdf"
  >>> tasks = ["task0"]
  >>> featurizer = dc.data.SDFLoader(tasks, smiles_field="smiles", mol_field="mol", 
                                      featurizer=featurizers)
  >>> dataset = featurizer.featurize(input_file)

  """
  conformers = True
  name = 'coulomb_matrix'
@@ -176,6 +188,16 @@ class CoulombMatrixEig(CoulombMatrix):
      Number of random Coulomb matrices to generate if randomize is True.
  seed : int, optional
      Random seed.

  Example:

  >>> featurizers = dc.feat.CoulombMatrixEig(max_atoms=23)
  >>> input_file = "input.sdf"
  >>> tasks = ["task0"]
  >>> featurizer = dc.data.SDFLoader(tasks, smiles_field="smiles", mol_field="mol", 
                                      featurizer=featurizers)
  >>> dataset = featurizer.featurize(input_file)

  """

  conformers = True
+19 −2
Original line number Diff line number Diff line
@@ -28,14 +28,31 @@ class TestCoulombMatrix(unittest.TestCase):
        """
        Test CoulombMatrix.
        """
        f = cm.CoulombMatrix(self.mol.GetNumAtoms())
        rval = f([self.mol])
        assert rval.shape == (1, self.mol.GetNumConformers(), self.mol.GetNumAtoms(), self.mol.GetNumAtoms())

    def test_coulomb_matrix_padding(self):
        """
        Test CoulombMatrix with padding.
        """
        max_atoms = self.mol.GetNumAtoms() * 2
        f = cm.CoulombMatrix(max_atoms=max_atoms)
        rval = f([self.mol])
        assert rval.shape == (1, self.mol.GetNumConformers(), max_atoms, max_atoms)

    def test_upper_tri_coulomb_matrix(self):
        """
        Test upper triangular CoulombMatrix.
        """
        f = cm.CoulombMatrix(self.mol.GetNumAtoms(), upper_tri=True)
        rval = f([self.mol])
        size = np.triu_indices(self.mol.GetNumAtoms())[0].size
        assert rval.shape == (1, self.mol.GetNumConformers(), size)

    def test_coulomb_matrix_padding(self):
    def test_upper_tri_coulomb_matrix_padding(self):
        """
        Test CoulombMatrix with padding.
        Test upper triangular CoulombMatrix with padding.
        """
        f = cm.CoulombMatrix(max_atoms=self.mol.GetNumAtoms() * 2, upper_tri=True)
        rval = f([self.mol])