Commit b0f03c9b authored by Pasc-Iv's avatar Pasc-Iv
Browse files

Let SmilesToImage featurizer fail gracefully for molecules which do not fit the image

parent c68f7bab
Loading
Loading
Loading
Loading
+13 −8
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ import numpy as np
from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer


class SmilesToImage(MolecularFeaturizer):
  """Convert SMILES string to an image.

@@ -152,14 +151,20 @@ class SmilesToImage(MolecularFeaturizer):
        (line_coords[:, 0] + self.embed) / self.res).astype(int)
    bond_line_idys = np.ceil(
        (line_coords[:, 1] + self.embed) / self.res).astype(int)
    # Set the bond line coordinates to the bond property used.
    img[bond_line_idxs, bond_line_idys, 0] = bond_props[:, 0]

    # Turn atomic coordinates into image positions
    atom_idxs = np.round(
      (atom_coords[:, 0] + self.embed) / self.res).astype(int)
    atom_idys = np.round(
      (atom_coords[:, 1] + self.embed) / self.res).astype(int)

    try:
      # Set the bond line coordinates to the bond property used.
      img[bond_line_idxs, bond_line_idys, 0] = bond_props[:, 0]

      # Set the atom positions in image to different atomic properties in channels
      img[atom_idxs, atom_idys, :] = atom_props

    except IndexError:
      # With fixed res and img_size some molecules (e.g. long chains) may not fit.
      raise IndexError("The molecule does not fit into the image. Consider increasing img_size or res of the SmilesToImage featurizer.")
    return img
+17 −0
Original line number Diff line number Diff line
@@ -43,6 +43,13 @@ class TestSmilesToImage(unittest.TestCase):
  def setUp(self):
    """Setup."""
    self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
    self.long_molecule_smiles = ["CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"]

  def test_smiles_to_image(self):
    """Test default SmilesToImage"""
    featurizer = SmilesToImage()
    features = featurizer.featurize(self.smiles)
    assert features.shape == (2, 80, 80, 1)

  def test_smiles_to_image(self):
    """Test default SmilesToImage"""
@@ -82,3 +89,13 @@ class TestSmilesToImage(unittest.TestCase):
    features = featurizer.featurize(self.smiles)
    assert features.shape == (2, 80, 80, 4)
    assert not np.allclose(base_features, features)

  def test_smiles_to_image_long_molecule(self):
    """Test SmilesToImage for a molecule which does not fit the image"""
    featurizer = SmilesToImage(
               img_size=80,
               res=0.5,
               max_len=250,
               img_spec="std")
    features = featurizer.featurize(self.long_molecule_smiles)
    assert features.shape == (1, 0)