Commit 86e19cbc authored by alat-rights's avatar alat-rights
Browse files

Modified comment on one_hot according to Peter's suggestion

parent 5943f0ab
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -18,13 +18,16 @@ ZINC_CHARSET = [


class OneHotFeaturizer(Featurizer):
  """Encodes SMILES or any arbitrary string as a one-hot array.
  """Encodes any arbitrary string or molecule as a one-hot array.

  This featurizer encodes either a SMILES string or any arbitrary string as a one-hot array.
  This featurizer encodes the characters within any given string as a one-hot 
  array. It also works with RDKit molecules: it can convert RDKit molecules to 
  SMILES strings and then one-hot encode the characters in said strings.

  Note
  ----
  This class needs RDKit to be installed in order to work with RDKit molecules.
  It does not need RDKit to be installed to work with arbitrary strings.
  """

  def __init__(self, charset: List[str] = ZINC_CHARSET, max_length: int = 100):