Unverified Commit ab5fa059 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2442 from PascalIversen/master

Improve SmilesToImage Error Message for long molecules
parents 3b892827 f641cbe2
Loading
Loading
Loading
Loading
+13 −5
Original line number Diff line number Diff line
@@ -152,14 +152,22 @@ class SmilesToImage(MolecularFeaturizer):
        (line_coords[:, 0] + self.embed) / self.res).astype(int)
    bond_line_idys = np.ceil(
        (line_coords[:, 1] + self.embed) / self.res).astype(int)
    # Set the bond line coordinates to the bond property used.
    img[bond_line_idxs, bond_line_idys, 0] = bond_props[:, 0]

    # Turn atomic coordinates into image positions
    atom_idxs = np.round(
        (atom_coords[:, 0] + self.embed) / self.res).astype(int)
    atom_idys = np.round(
        (atom_coords[:, 1] + self.embed) / self.res).astype(int)

    try:
      # Set the bond line coordinates to the bond property used.
      img[bond_line_idxs, bond_line_idys, 0] = bond_props[:, 0]

      # Set the atom positions in image to different atomic properties in channels
      img[atom_idxs, atom_idys, :] = atom_props

    except IndexError:
      # With fixed res and img_size some molecules (e.g. long chains) may not fit.
      raise IndexError(
          "The molecule does not fit into the image. Consider increasing img_size or res of the SmilesToImage featurizer."
      )
    return img
+10 −0
Original line number Diff line number Diff line
@@ -43,6 +43,9 @@ class TestSmilesToImage(unittest.TestCase):
  def setUp(self):
    """Setup."""
    self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
    self.long_molecule_smiles = [
        "CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
    ]

  def test_smiles_to_image(self):
    """Test default SmilesToImage"""
@@ -82,3 +85,10 @@ class TestSmilesToImage(unittest.TestCase):
    features = featurizer.featurize(self.smiles)
    assert features.shape == (2, 80, 80, 4)
    assert not np.allclose(base_features, features)

  def test_smiles_to_image_long_molecule(self):
    """Test SmilesToImage for a molecule which does not fit the image"""
    featurizer = SmilesToImage(
        img_size=80, res=0.5, max_len=250, img_spec="std")
    features = featurizer.featurize(self.long_molecule_smiles)
    assert features.shape == (1, 0)