Commit f541cea3 authored by seyonechithrananda's avatar seyonechithrananda
Browse files

Merge branch 'master' of gt ithub.com:deepchem/deepchem into st_fix

parents 4cd8e6cb b010928f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ from deepchem.feat.base_classes import MaterialStructureFeaturizer
from deepchem.feat.base_classes import MaterialCompositionFeaturizer
from deepchem.feat.base_classes import ComplexFeaturizer
from deepchem.feat.base_classes import UserDefinedFeaturizer
from deepchem.feat.base_classes import DummyFeaturizer

from deepchem.feat.graph_features import ConvMolFeaturizer
from deepchem.feat.graph_features import WeaveFeaturizer
+37 −0
Original line number Diff line number Diff line
@@ -421,3 +421,40 @@ class UserDefinedFeaturizer(Featurizer):
  def __init__(self, feature_fields):
    """Creates user-defined-featurizer."""
    self.feature_fields = feature_fields


class DummyFeaturizer(Featurizer):
  """Class that implements a no-op featurization.
  This is useful when the raw dataset has to be used without featurizing the
  examples. The Molnet loader requires a featurizer input and such datasets
  can be used in their original form by passing the raw featurizer.

  Examples
  --------
  >>> import deepchem as dc
  >>> smi_map = [["N#C[S-].O=C(CBr)c1ccc(C(F)(F)F)cc1>CCO.[K+]", "N#CSCC(=O)c1ccc(C(F)(F)F)cc1"], ["C1COCCN1.FCC(Br)c1cccc(Br)n1>CCN(C(C)C)C(C)C.CN(C)C=O.O", "FCC(c1cccc(Br)n1)N1CCOCC1"]]
  >>> Featurizer = dc.feat.DummyFeaturizer()
  >>> smi_feat = Featurizer.featurize(smi_map)
  >>> smi_feat
  array([['N#C[S-].O=C(CBr)c1ccc(C(F)(F)F)cc1>CCO.[K+]',
          'N#CSCC(=O)c1ccc(C(F)(F)F)cc1'],
         ['C1COCCN1.FCC(Br)c1cccc(Br)n1>CCN(C(C)C)C(C)C.CN(C)C=O.O',
          'FCC(c1cccc(Br)n1)N1CCOCC1']], dtype='<U55')
  """

  def featurize(self, datapoints: Iterable[Any],
                log_every_n: int = 1000) -> np.ndarray:
    """Passes through dataset, and returns the datapoint.

    Parameters
    ----
    datapoints: Iterable[Any]
      A sequence of objects that you'd like to featurize.

    Returns
    ----
    datapoints: np.ndarray
      A numpy array containing a featurized representation of
      the datapoints.
    """
    return np.asarray(datapoints)
+25 −0
Original line number Diff line number Diff line
import unittest
import deepchem as dc
import numpy as np


class TestDummyFeaturizer(unittest.TestCase):
  """
  Test for DummyFeaturizer.
  """

  def test_featurize(self):
    """
    Test the featurize method on an array of inputs.
    """
    input_array = np.array([[
        "N#C[S-].O=C(CBr)c1ccc(C(F)(F)F)cc1>CCO.[K+]",
        "N#CSCC(=O)c1ccc(C(F)(F)F)cc1"
    ], [
        "C1COCCN1.FCC(Br)c1cccc(Br)n1>CCN(C(C)C)C(C)C.CN(C)C=O.O",
        "FCC(c1cccc(Br)n1)N1CCOCC1"
    ]])
    featurizer = dc.feat.DummyFeaturizer()
    out = featurizer.featurize(input_array)
    assert (type(out) == np.ndarray)
    assert (out.shape == input_array.shape)
+7 −0
Original line number Diff line number Diff line
@@ -403,6 +403,13 @@ UserDefinedFeaturizer
  :members:
  :inherited-members:

DummyFeaturizer
^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: deepchem.feat.DummyFeaturizer
  :members:
  :inherited-members:

Base Featurizers (for develop)
------------------------------