Commit 2350e182 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Updates for final draft

parent a0d83754
Loading
Loading
Loading
Loading
+16 −11
Original line number Diff line number Diff line
@@ -11,10 +11,14 @@ import tempfile
import shutil
import deepchem as dc
import pandas as pd
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from FACTORS_datasets import load_factors

###Load data###
np.random.seed(123)
shard_size = 2000
print("About to load FACTORS data.")
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
@@ -26,18 +30,19 @@ n_tasks = y_train.shape[1]
all_results = []
for task in range(n_tasks):
  y_task = y_train[:, task]
  task_results = []
  for other_task in range(n_tasks):
    if task == other_task:
      task_results.append(1.)
      continue
    y_other = y_train[:, other_task]
    r2 = dc.metrics.pearson_r2_score(y_task, y_other)
    #print("r2 for %s-%s is %f" % (task, other_task, r2))
    task_results.append(r2)
  print("Task %d" % task)
  print(task_results)
  all_results.append(task_results)
print("Writing results to factors_corr.csv")
df = pd.DataFrame(all_results)
df.to_csv("factors_corr.csv")
    print("r2 for %s-%s is %f" % (task, other_task, r2))
    all_results.append(r2)

# the histogram of the data
n, bins, patches = plt.hist(np.array(all_results), 50, normed=True, stacked=True,
                            facecolor='green', alpha=0.75)
plt.xlabel('Cross-task Correlations')
plt.ylabel('Probability Density')
plt.title('Histogram of Factors Intertask Correlations')
plt.grid(True)
plt.savefig("Factors_correlations.png")
+0 −1
Original line number Diff line number Diff line
@@ -14,7 +14,6 @@ from sklearn.ensemble import RandomForestRegressor
from FACTORS_datasets import load_factors

###Load data###
np.random.seed(123)
shard_size = 2000
num_trials = 5
print("About to load FACTORS data.")
+2 −5
Original line number Diff line number Diff line
@@ -13,12 +13,9 @@ import numpy as np
import deepchem as dc
from FACTORS_datasets import load_factors

# Set numpy seed
np.random.seed(123)

###Load data###
shard_size = 2000
num_trials = 5
num_trials = 2
print("About to load FACTORS data.")
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
@@ -41,7 +38,7 @@ for trial in range(num_trials):
      weight_init_stddevs=[.02]*n_layers,
      bias_init_consts=[1.]*n_layers, learning_rate=.0003,
      penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
      seed=123)
      logdir="FACTORS_tf_model" )

  #Use R2 classification metric
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
+2 −5
Original line number Diff line number Diff line
@@ -13,12 +13,9 @@ import numpy as np
import deepchem as dc
from FACTORS_datasets import load_factors

# Set numpy seed
np.random.seed(123)

###Load data###
shard_size = 2000
num_trials = 5
num_trials = 2
print("About to load FACTORS data.")
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
@@ -41,7 +38,7 @@ for trial in range(num_trials):
      alpha_init_stddevs=[.02]*n_layers, weight_init_stddevs=[.02]*n_layers,
      bias_init_consts=[1.]*n_layers, learning_rate=.0003,
      penalty=.0001, penalty_type="l2", optimizer="adam", batch_size=100,
      seed=123)
      logdir="FACTORS_tf_progressive")

  #Use R2 classification metric
  metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, task_averager=np.mean)
+2 −5
Original line number Diff line number Diff line
@@ -13,12 +13,9 @@ import shutil
import deepchem as dc
from FACTORS_datasets import load_factors

# Set numpy seed
np.random.seed(123)

###Load data###
shard_size = 2000
num_trials = 5
num_trials = 2
print("About to load FACTORS data.")
FACTORS_tasks, datasets, transformers = load_factors(shard_size=shard_size)
train_dataset, valid_dataset, test_dataset = datasets
@@ -47,7 +44,7 @@ for trial in range(num_trials):
      bypass_weight_init_stddevs=[.02]*n_bypass_layers,
      bypass_bias_init_consts=[1.]*n_bypass_layers,
      learning_rate=.0003, penalty=.0001, penalty_type="l2",
      optimizer="adam", batch_size=100, seed=123)
      optimizer="adam", batch_size=100, logdir="FACTORS_tf_bypass")

  print("Fitting Model")
  model.fit(train_dataset, nb_epoch=nb_epoch)
Loading