Unverified Commit 457b25e0 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #916 from lilleswing/singleton_csv

Singleton csv works
parents 4deb33c9 06166a9c
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ def featurize_smiles_df(df, featurizer, field, log_every_N=1000, verbose=True):
  valid_inds = np.array(
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  return np.squeeze(np.array(features)), valid_inds
  return np.squeeze(np.array(features), axis=1), valid_inds


def featurize_smiles_np(arr, featurizer, log_every_N=1000, verbose=True):
@@ -101,8 +101,7 @@ def featurize_smiles_np(arr, featurizer, log_every_N=1000, verbose=True):
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  features = np.squeeze(np.array(features))
  return features.reshape(
      -1,)
  return features.reshape(-1,)


def get_user_specified_features(df, featurizer, verbose=True):
+25 −0
Original line number Diff line number Diff line
import os
from unittest import TestCase
from io import StringIO
import tempfile

import shutil

import deepchem as dc


class TestCSVLoader(TestCase):

  def test_load_singleton_csv(self):
    fin = tempfile.NamedTemporaryFile(mode='w', delete=False)
    fin.write("smiles,endpoint\nc1ccccc1,1")
    fin.close()
    print(fin.name)
    featurizer = dc.feat.CircularFingerprint(size=1024)
    tasks = ["endpoint"]
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)

    X = loader.featurize(fin.name)
    self.assertEqual(1, len(X))
    os.remove(fin.name)