Commit 2246d0a7 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Fixes to multitask data loading.

parent b9b21c60
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -72,6 +72,14 @@ class TestReload(TestAPI):
    # TODO(rbharath): Transformers don't play nice with reload! Namely,
    # reloading will cause the transform to be reapplied. This is undesirable in
    # almost all cases. Need to understand a method to fix this.
    ##################################### DEBUG
    
    print("_run_muv_experiment()")
    print("train_dataset.get_labels()")
    print(train_dataset.get_labels())
    print("train_dataset.get_weights()")
    print(train_dataset.get_weights())
    ##################################### DEBUG
    transformers = [
        BalancingTransformer(transform_w=True, dataset=train_dataset)]
    print("Transforming datasets")
+3 −1
Original line number Diff line number Diff line
@@ -71,7 +71,9 @@ def _load_sdf_file(input_file):
def _load_csv_file(filename, shard_size=None):
  """Load data as pandas dataframe."""
  # First line of user-specified CSV *must* be header.
  return pd.read_csv(filename, header=0, chunksize=shard_size)
  for df in pd.read_csv(filename, header=0, chunksize=shard_size):
    df = df.replace(np.nan, str(""), regex=True)
    yield df

def _get_input_type(input_file):
  """Get type of input file. Must be csv/pkl.gz/sdf file."""