Commit 24e01c43 authored by nd-02110114's avatar nd-02110114
Browse files

💚 fix ci

parent e3e36f18
Loading
Loading
Loading
Loading
+60 −61
Original line number Diff line number Diff line
@@ -5,10 +5,7 @@ Tests to make sure deepchem models can fit models on easy datasets.
import sklearn
import sklearn.datasets
import numpy as np
import unittest
import tempfile
import deepchem as dc
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression

@@ -122,63 +119,65 @@ def test_sklearn_multitask_regression():
  assert score > .5


#def test_sklearn_classification():
#  """Test that sklearn models can learn on simple classification datasets."""
#  np.random.seed(123)
#  dataset = sklearn.datasets.load_digits(n_class=2)
#  X, y = dataset.data, dataset.target

#  frac_train = .7
#  n_samples = len(X)
#  n_train = int(frac_train*n_samples)
#  X_train, y_train = X[:n_train], y[:n_train]
#  X_test, y_test = X[n_train:], y[n_train:]
#  train_dataset = dc.data.NumpyDataset(X_train, y_train)
#  test_dataset = dc.data.NumpyDataset(X_test, y_test)

#  classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
#  sklearn_model = LogisticRegression()
#  model = dc.models.SklearnModel(sklearn_model)

#  # Fit trained model
#  model.fit(train_dataset)
#  model.save()

#  # Eval model on test
#  scores = model.evaluate(test_dataset, [classification_metric])
#  assert scores[classification_metric.name] > .5

#def test_sklearn_multitask_classification():
#  """Test that sklearn models can learn on simple multitask classification."""
#  np.random.seed(123)
#  n_tasks = 4
#  tasks = range(n_tasks)
#  dataset = sklearn.datasets.load_digits(n_class=2)
#  X, y = dataset.data, dataset.target
#  y = np.reshape(y, (len(y), 1))
#  y = np.hstack([y] * n_tasks)
#
#  frac_train = .7
#  n_samples = len(X)
#  n_train = int(frac_train*n_samples)
#  X_train, y_train = X[:n_train], y[:n_train]
#  X_test, y_test = X[n_train:], y[n_train:]
#  train_dataset = dc.data.DiskDataset.from_numpy(X_train, y_train)
#  test_dataset = dc.data.DiskDataset.from_numpy(X_test, y_test)

#  classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
#  def model_builder(model_dir):
#    sklearn_model = LogisticRegression()
#    return dc.models.SklearnModel(sklearn_model, model_dir)
#  model = dc.models.SingletaskToMultitask(tasks, model_builder)

#  # Fit trained model
#  model.fit(train_dataset)
#  model.save()
#  # Eval model on test
#  scores = model.evaluate(test_dataset, [classification_metric])
#  for score in scores[classification_metric.name]:
#    assert score > .5
def test_sklearn_classification():
  """Test that sklearn models can learn on simple classification datasets."""
  np.random.seed(123)
  dataset = sklearn.datasets.load_digits(n_class=2)
  X, y = dataset.data, dataset.target

  frac_train = .7
  n_samples = len(X)
  n_train = int(frac_train * n_samples)
  X_train, y_train = X[:n_train], y[:n_train]
  X_test, y_test = X[n_train:], y[n_train:]
  train_dataset = dc.data.NumpyDataset(X_train, y_train)
  test_dataset = dc.data.NumpyDataset(X_test, y_test)

  classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
  sklearn_model = LogisticRegression()
  model = dc.models.SklearnModel(sklearn_model)

  # Fit trained model
  model.fit(train_dataset)
  model.save()

  # Eval model on test
  scores = model.evaluate(test_dataset, [classification_metric])
  assert scores[classification_metric.name] > .5


def test_sklearn_multitask_classification():
  """Test that sklearn models can learn on simple multitask classification."""
  np.random.seed(123)
  n_tasks = 4
  tasks = range(n_tasks)
  dataset = sklearn.datasets.load_digits(n_class=2)
  X, y = dataset.data, dataset.target
  y = np.reshape(y, (len(y), 1))
  y = np.hstack([y] * n_tasks)

  frac_train = .7
  n_samples = len(X)
  n_train = int(frac_train * n_samples)
  X_train, y_train = X[:n_train], y[:n_train]
  X_test, y_test = X[n_train:], y[n_train:]
  train_dataset = dc.data.DiskDataset.from_numpy(X_train, y_train)
  test_dataset = dc.data.DiskDataset.from_numpy(X_test, y_test)

  classification_metric = dc.metrics.Metric(dc.metrics.roc_auc_score)

  def model_builder(model_dir):
    sklearn_model = LogisticRegression()
    return dc.models.SklearnModel(sklearn_model, model_dir)

  model = dc.models.SingletaskToMultitask(tasks, model_builder)

  # Fit trained model
  model.fit(train_dataset)
  model.save()
  # Eval model on test
  scores = model.evaluate(test_dataset, [classification_metric])
  assert scores[classification_metric.name] > .5


def test_xgboost_regression():
@@ -245,7 +244,7 @@ def test_xgboost_multitask_regression():
  # Eval model on test
  scores = model.evaluate(test_dataset, [regression_metric])
  score = scores[regression_metric.name]
  assert score < 50
  assert score < 55


def test_xgboost_classification():