Commit 204aa45a authored by seyonechithrananda's avatar seyonechithrananda
Browse files

remove rdkit mol req

parent c6a4761d
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -27,8 +27,6 @@ class Featurizer(object):
                log_every_n: int = 1000) -> np.ndarray:
    """Calculate features for datapoints.

    `**kwargs` will get passed directly to `Featurizer._featurize`

    Parameters
    ----------
    datapoints: Iterable[Any]
+10 −15
Original line number Diff line number Diff line
@@ -43,25 +43,20 @@ class RobertaFeaturizer(RobertaTokenizerFast, MolecularFeaturizer):
    self.attention_mask = attention_mask
    return

  def _featurize(self, mol: RDKitMol) -> List[List[int]]:
  def _featurize(self, smiles_string: str) -> List[List[int]]:
    """Calculate encoding using HuggingFace's RobertaTokenizerFast

    Parameters
    ----------
        mol: rdkit.Chem.rdchem.Mol
          RDKit Mol object
    smiles_string: str
      String containing SMILES sequence.

    Returns
    -------
    encoding: List
      List containing two lists; the `input_ids` and the `attention_mask`

    """
    try:
      from rdkit import Chem
    except ModuleNotFoundError:
      raise ImportError("This class requires RDKit to be installed.")
    smiles_string = Chem.MolToSmiles(mol)

    # the encoding is natively a dictionary with keys 'input_ids' and 'attention_mask'
    # -> make this a list of two lists to allow np to handle it
    # encoding = list(self(smiles_string, **kwargs).values())