Commit 12cd0759 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Fixed generalization tests

parent ce4a5ca7
Loading
Loading
Loading
Loading
+23 −111
Original line number Diff line number Diff line
@@ -34,31 +34,20 @@ class TestGeneralization(TestAPI):
  def test_sklearn_regression(self):
    """Test that sklearn models can learn on simple regression datasets."""
    np.random.seed(123)

    dataset = sklearn.datasets.load_diabetes()
    X, y = dataset.data, dataset.target

    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "regression" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    regression_metric = Metric(metrics.r2_score, verbosity=verbosity)
    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="regression",
                         model_instance=LinearRegression())
    sklearn_model = LinearRegression()
    model = SklearnModel(sklearn_model, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
@@ -68,15 +57,11 @@ class TestGeneralization(TestAPI):
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([regression_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([regression_metric])
    print("scores")
    print(scores)

    assert scores[regression_metric.name] > .5

@@ -88,39 +73,24 @@ class TestGeneralization(TestAPI):

    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)


    # Eval model on train
    input_transformers = [
    transformers = [
        NormalizationTransformer(transform_X=True, dataset=train_dataset),
        ClippingTransformer(transform_X=True, dataset=train_dataset)]
    output_transformers = [
        ClippingTransformer(transform_X=True, dataset=train_dataset),
        NormalizationTransformer(transform_y=True, dataset=train_dataset)]
    transformers = input_transformers + output_transformers
    for transformer in transformers:
        transformer.transform(train_dataset)
    for data in [train_dataset, test_dataset]:
      for transformer in transformers:
        transformer.transform(test_dataset)

    tasks = train_dataset.get_task_names()
    task_types = {task: "regression" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }
          transformer.transform(data)

    verbosity = "high"
    regression_metric = Metric(metrics.r2_score, verbosity=verbosity)
    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="regression",
                         model_instance=LinearRegression())
    sklearn_model = LinearRegression()
    model = SklearnModel(sklearn_model, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
@@ -128,24 +98,18 @@ class TestGeneralization(TestAPI):

    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([regression_metric])
    print("train_scores")
    print(train_scores)

    assert train_scores[regression_metric.name] > .5

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([regression_metric])
    print("scores")
    print(scores)

    assert scores[regression_metric.name] > .5

  def test_sklearn_multitask_regression(self):
    """Test that sklearn models can learn on simple multitask regression."""
    np.random.seed(123)
    n_tasks = 4
    tasks = range(n_tasks)
    dataset = sklearn.datasets.load_diabetes()
    X, y = dataset.data, dataset.target
    y = np.reshape(y, (len(y), 1))
@@ -153,30 +117,17 @@ class TestGeneralization(TestAPI):
    
    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "regression" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    regression_metric = Metric(metrics.r2_score, verbosity=verbosity)
    def model_builder(tasks, task_types, model_params, model_dir, verbosity=None):
      return SklearnModel(tasks, task_types, model_params, model_dir,
                          mode="regression",
                          model_instance=LinearRegression(),
                          verbosity=verbosity)
    model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir,
                                  model_builder, verbosity=verbosity)
    def model_builder(model_dir):
      sklearn_model = LinearRegression()
      return SklearnModel(sklearn_model, model_dir)
    model = SingletaskToMultitask(tasks, model_builder, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
@@ -186,15 +137,11 @@ class TestGeneralization(TestAPI):
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([regression_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([regression_metric])
    print("scores")
    print(scores)

    for score in scores[regression_metric.name]:
      assert score > .5
@@ -207,29 +154,15 @@ class TestGeneralization(TestAPI):

    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    print("X_train.shape, y_train.shape, X_test.shape, y_test.shape")
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "classification" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    model = SklearnModel(tasks, task_types, model_params, self.model_dir,
                         mode="classification",
                         model_instance=LogisticRegression())
    sklearn_model = LogisticRegression()
    model = SklearnModel(sklearn_model, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
@@ -239,22 +172,18 @@ class TestGeneralization(TestAPI):
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([classification_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])
    print("scores")
    print(scores)

    assert scores[classification_metric.name] > .5

  def test_sklearn_multitask_classification(self):
    """Test that sklearn models can learn on simple multitask classification."""
    np.random.seed(123)
    n_tasks = 4
    tasks = range(n_tasks)
    dataset = sklearn.datasets.load_digits(n_class=2)
    X, y = dataset.data, dataset.target
    y = np.reshape(y, (len(y), 1))
@@ -262,30 +191,17 @@ class TestGeneralization(TestAPI):
    
    frac_train = .7
    n_samples = len(X)
    
    X_train, y_train = X[:frac_train*n_samples], y[:frac_train*n_samples]
    X_test, y_test = X[frac_train*n_samples:], y[frac_train*n_samples:]

    train_dataset = Dataset.from_numpy(self.train_dir, X_train, y_train)
    test_dataset = Dataset.from_numpy(self.test_dir, X_test, y_test)

    tasks = train_dataset.get_task_names()
    task_types = {task: "classification" for task in tasks}

    model_params = {
      "batch_size": None,
      "data_shape": train_dataset.get_data_shape()
    }

    verbosity = "high"
    classification_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    def model_builder(tasks, task_types, model_params, model_dir, verbosity=None):
      return SklearnModel(tasks, task_types, model_params, model_dir,
                          mode="classification",
                          model_instance=LogisticRegression(),
                          verbosity=verbosity)
    model = SingletaskToMultitask(tasks, task_types, model_params, self.model_dir,
                                  model_builder, verbosity=verbosity)
    def model_builder(model_dir):
      sklearn_model = LogisticRegression()
      return SklearnModel(sklearn_model, model_dir)
    model = SingletaskToMultitask(tasks, model_builder, self.model_dir)

    # Fit trained model
    model.fit(train_dataset)
@@ -295,15 +211,11 @@ class TestGeneralization(TestAPI):
    transformers = []
    train_evaluator = Evaluator(model, train_dataset, transformers, verbosity=verbosity)
    train_scores = train_evaluator.compute_model_performance([classification_metric])
    print("train_scores")
    print(train_scores)

    # Eval model on test
    transformers = []
    evaluator = Evaluator(model, test_dataset, transformers, verbosity=verbosity)
    scores = evaluator.compute_model_performance([classification_metric])
    print("scores")
    print(scores)

    for score in scores[classification_metric.name]:
      assert score > .5