Commit ae13294c authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

yapf

parent 03ca10f2
Loading
Loading
Loading
Loading
+8 −6
Original line number Diff line number Diff line
@@ -13,13 +13,15 @@ import deepchem as dc

logger = logging.getLogger(__name__)


def load_sweet(featurizer='ECFP', split='index', reload=True, frac_train=.8):
  """Load sweet datasets."""
  # Load Sweetlead dataset
  logger.info("About to load Sweetlead dataset.")
  data_dir = dc.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "sweetlead/" + featurizer + "/" + str(split))
    save_dir = os.path.join(data_dir,
                            "sweetlead/" + featurizer + "/" + str(split))

  dataset_file = os.path.join(data_dir, "sweet.csv.gz")
  if not os.path.exists(dataset_file):
@@ -39,10 +41,10 @@ def load_sweet(featurizer='ECFP', split='index', reload=True, frac_train=.8):
      tasks=SWEET_tasks, smiles_field="smiles", featurizer=featurizer)
  dataset = loader.featurize(dataset_file)


  # Initialize transformers
  transformers = [
      dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)]
      dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)
  ]
  print("About to transform data")
  for transformer in transformers:
    dataset = transformer.transform(dataset)
+12 −9
Original line number Diff line number Diff line
@@ -19,17 +19,19 @@ from deepchem import metrics
from deepchem.metrics import Metric
from deepchem.models.sklearn_models import SklearnModel

tox_tasks, (tox_train, tox_valid, tox_test), tox_transformers = dc.molnet.load_tox21()
tox_tasks, (tox_train, tox_valid,
            tox_test), tox_transformers = dc.molnet.load_tox21()

classification_metric = Metric(
    metrics.roc_auc_score, np.mean, mode="classification")

classification_metric = Metric(metrics.roc_auc_score, np.mean, mode="classification")

def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
                          class_weight="balanced",
                          n_estimators=500,
                          n_jobs=-1)
      class_weight="balanced", n_estimators=500, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)


print(tox_train.get_task_names())
print(tox_tasks)
tox_model = SingletaskToMultitask(tox_tasks, model_builder)
@@ -37,7 +39,9 @@ tox_model.fit(tox_train)

# Load sider models now

sider_tasks, (sider_train, sider_valid, sider_test), sider_transformers = dc.molnet.load_sider(split="random")
sider_tasks, (
    sider_train, sider_valid,
    sider_test), sider_transformers = dc.molnet.load_sider(split="random")

sider_model = SingletaskToMultitask(sider_tasks, model_builder)
sider_model.fit(sider_train)
@@ -65,4 +69,3 @@ for i in range(tox_predictions.shape[0]):
df = pd.DataFrame(confusion_matrix)

df.to_csv("./tox_sider_matrix.csv")