Commit 879ddaa9 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #438 from lilleswing/full-benchmark-cr

Full benchmark Updates
parents 03c26773 255d43a7
Loading
Loading
Loading
Loading
+41 −26
Original line number Diff line number Diff line
@@ -8,7 +8,9 @@ BENCHMARK_TO_DESIRED_KEY_MAP = {
    "logreg": "logistic regression",
    "tf": "Multitask network",
    "tf_robust": "robust MT-NN",
    "tf_regression": "NN regression",
    "graphconv": "graph convolution",
    "graphconvreg": "graphconv regression",
}
DESIRED_RESULTS_CSV = "devtools/jenkins/desired_results.csv"
TEST_RESULTS_CSV = "examples/results.csv"
@@ -53,12 +55,18 @@ def find_desired_result(result, desired_results):


def is_good_result(my_result, desired_result):
  retval = True
  message = []
  for key in ['train_score', 'test_score']:
    # Higher is Better
    desired_value = desired_result[key] * (1.0 - CUSHION_PERCENT)
    if my_result[key] < desired_value:
      return False
  return True
      message_part = "%s,%s,%s,%s,%s,%s" % (
          my_result['data_set'], my_result['model'], my_result['split'], key,
          my_result[key], desired_result[key])
      message.append(message_part)
      retval = False
  return retval, message


def test_compare_results():
@@ -66,15 +74,22 @@ def test_compare_results():
  desired_results = parse_desired_results(desired_results)
  test_results = open(TEST_RESULTS_CSV).readlines()
  test_results = parse_test_results(test_results)
  failures = []
  exceptions = []
  for test_result in test_results:
    try:
      desired_result = find_desired_result(test_result, desired_results)
    if not is_good_result(test_result, desired_result):
      exceptions.append(({"test_result": test_result}, {"desired_result": desired_result}))
  if len(exceptions) > 0:
      passes, message = is_good_result(test_result, desired_result)
      if not passes:
        failures.extend(message)
    except Exception as e:
      exceptions.append("Unable to find desired result for %s" % test_result)
  for exception in exceptions:
    print(exception)
    assert_true(len(exceptions) == 0, "Some performance benchmarks not passed")
  for failure in failures:
    print(failure)
  assert_true(len(exceptions) == 0, "Error parsing performance results")
  assert_true(len(failures) == 0, "Some performance benchmarks not passed")

  if __name__ == "__main__":
    test_compare_results()
+79 −0
Original line number Diff line number Diff line
split,dataset,model,Train score/ROC-AUC,Valid score/ROC-AUC
Index splitting,tox21,logistic regression,0.903,0.705
Index splitting,tox21,Random Forest,0.999,0.733
Index splitting,tox21,IRV,0.811,0.767
Index splitting,tox21,Multitask network,0.856,0.763
Index splitting,tox21,robust MT-NN,0.857,0.767
Index splitting,tox21,graph convolution,0.872,0.798
@@ -12,6 +14,8 @@ Index splitting,pcba,Multitask network,0.826,0.802
Index splitting,pcba,robust MT-NN,0.809,0.783
Index splitting,pcba,graph convolution,0.876,0.852
Index splitting,sider,logistic regression,0.933,0.620
Index splitting,sider,Random Forest,0.999,0.670
Index splitting,sider,IRV,0.649,0.642
Index splitting,sider,Multitask network,0.775,0.634
Index splitting,sider,robust MT-NN,0.803,0.632
Index splitting,sider,graph convolution,0.708,0.594
@@ -20,10 +24,20 @@ Index splitting,toxcast,Multitask network,0.830,0.678
Index splitting,toxcast,robust MT-NN,0.825,0.680
Index splitting,toxcast,graph convolution,0.821,0.720
Index splitting,clintox,logistic regression,0.967,0.676
Index splitting,clintox,Random Forest,0.995,0.776
Index splitting,clintox,IRV,0.763,0.814
Index splitting,clintox,Multitask network,0.934,0.830
Index splitting,clintox,robust MT-NN,0.949,0.827
Index splitting,clintox,graph convolution,0.946,0.860
Index splitting,hiv,logistic regression,0.864,0.739
Index splitting,hiv,Random Forest,0.999,0.720
Index splitting,hiv,IRV,0.841,0.724
Index splitting,hiv,Multitask network,0.761,0.652
Index splitting,hiv,robust MT-NN,0.780,0.708
Index splitting,hiv,graph convolution,0.876,0.779
Random splitting,tox21,logistic regression,0.902,0.715
Random splitting,tox21,Random Forest,0.999,0.764
Random splitting,tox21,IRV,0.808,0.767
Random splitting,tox21,Multitask network,0.844,0.795
Random splitting,tox21,robust MT-NN,0.855,0.773
Random splitting,tox21,graph convolution,0.865,0.827
@@ -36,6 +50,8 @@ Random splitting,pcba,Multitask network,0.811,0.778
Random splitting,pcba,robust MT-NN,0.811,0.771
Random splitting,pcba,graph convolution,0.872,0.844
Random splitting,sider,logistic regression,0.929,0.656
Random splitting,sider,Random Forest,0.999,0.665
Random splitting,sider,IRV,0.648,0.596
Random splitting,sider,Multitask network,0.777,0.655
Random splitting,sider,robust MT-NN,0.804,0.630
Random splitting,sider,graph convolution,0.705,0.618
@@ -44,10 +60,20 @@ Random splitting,toxcast,Multitask network,0.836,0.684
Random splitting,toxcast,robust MT-NN,0.822,0.681
Random splitting,toxcast,graph convolution,0.820,0.717
Random splitting,clintox,logistic regression,0.972,0.725
Random splitting,clintox,Random Forest,0.997,0.670
Random splitting,clintox,IRV,0.809,0.846
Random splitting,clintox,Multitask network,0.951,0.834
Random splitting,clintox,robust MT-NN,0.959,0.830
Random splitting,clintox,graph convolution,0.975,0.876
Random splitting,hiv,logistic regression,0.860,0.806
Random splitting,hiv,Random Forest,0.999,0.850
Random splitting,hiv,IRV,0.839,0.809
Random splitting,hiv,Multitask network,0.742,0.715
Random splitting,hiv,robust MT-NN,0.753,0.727
Random splitting,hiv,graph convolution,0.847,0.803
Scaffold splitting,tox21,logistic regression,0.900,0.650
Scaffold splitting,tox21,Random Forest,0.999,0.629
Scaffold splitting,tox21,IRV,0.823,0.708
Scaffold splitting,tox21,Multitask network,0.863,0.703
Scaffold splitting,tox21,robust MT-NN,0.861,0.710
Scaffold splitting,tox21,graph convolution,0.885,0.732
@@ -60,6 +86,8 @@ Scaffold splitting,pcba,Multitask network,0.814,0.760
Scaffold splitting,pcba,robust MT-NN,0.812,0.756
Scaffold splitting,pcba,graph convolution,0.874,0.817
Scaffold splitting,sider,logistic regression,0.926,0.592
Scaffold splitting,sider,Random Forest,0.999,0.619
Scaffold splitting,sider,IRV,0.639,0.599
Scaffold splitting,sider,Multitask network,0.776,0.557
Scaffold splitting,sider,robust MT-NN,0.797,0.560
Scaffold splitting,sider,graph convolution,0.722,0.583
@@ -68,6 +96,57 @@ Scaffold splitting,toxcast,Multitask network,0.828,0.617
Scaffold splitting,toxcast,robust MT-NN,0.830,0.614
Scaffold splitting,toxcast,graph convolution,0.832,0.638
Scaffold splitting,clintox,logistic regression,0.960,0.803
Scaffold splitting,clintox,Random Forest,0.993,0.735
Scaffold splitting,clintox,IRV,0.793,0.718
Scaffold splitting,clintox,Multitask network,0.947,0.862
Scaffold splitting,clintox,robust MT-NN,0.953,0.890
Scaffold splitting,clintox,graph convolution,0.957,0.823
Scaffold splitting,hiv,logistic regression,0.858,0.798
Scaffold splitting,hiv,Random Forest,0.946,0.562
Scaffold splitting,hiv,IRV,0.847,0.811
Scaffold splitting,hiv,Multitask network,0.775,0.765
Scaffold splitting,hiv,robust MT-NN,0.785,0.748
Scaffold splitting,hiv,graph convolution,0.867,0.769
Index splitting,delaney,Random Forest,0.953,0.626
Index splitting,delaney,NN regression,0.868,0.578
Index splitting,delaney,graphconv regression,0.967,0.790
Random splitting,delaney,Random Forest,0.951,0.684
Random splitting,delaney,NN regression,0.865,0.574
Random splitting,delaney,graphconv regression,0.964,0.782
Scaffold splitting,delaney,Random Forest,0.953,0.284
Scaffold splitting,delaney,NN regression,0.866,0.342
Scaffold splitting,delaney,graphconv regression,0.967,0.606
Index splitting,sampl,Random Forest,0.968,0.736
Index splitting,sampl,NN regression,0.917,0.764
Index splitting,sampl,graphconv regression,0.982,0.864
Random splitting,sampl,Random Forest,0.967,0.752
Random splitting,sampl,NN regression,0.908,0.830
Random splitting,sampl,graphconv regression,0.987,0.868
Scaffold splitting,sampl,Random Forest,0.966,0.473
Scaffold splitting,sampl,NN regression,0.891,0.217
Scaffold splitting,sampl,graphconv regression,0.985,0.666
Index splitting,nci,NN regression,0.171,0.062
Index splitting,nci,graphconv regression,0.123,0.048
Random splitting,nci,NN regression,0.168,0.085
Random splitting,nci,graphconv regression,0.117,0.076
Scaffold splitting,nci,NN regression,0.180,0.052
Scaffold splitting,nci,graphconv regression,0.131,0.046
Random splitting,pdbbind(core),Random Forest,0.969,0.445
Random splitting,pdbbind(core),NN regression,0.973,0.494
Random splitting,pdbbind(refined),Random Forest,0.963,0.511
Random splitting,pdbbind(refined),NN regression,0.987,0.503
Random splitting,pdbbind(full),Random Forest,0.965,0.493
Random splitting,pdbbind(full),NN regression,0.983,0.528
Index splitting,chembl,NN regression,0.443,0.427
Random splitting,chembl,NN regression,0.464,0.434
Scaffold splitting,chembl,NN regression,0.484,0.361
Index splitting,qm7,NN regression,0.997,0.986
Random splitting,qm7,NN regression,0.999,0.999
Stratified splitting,qm7,NN regression,0.999,0.999
Index splitting,qm7b,NN regression,0.931,0.803
Random splitting,qm7b,NN regression,0.923,0.884
Stratified splitting,qm7b,NN regression,0.934,0.884
Index splitting,qm9,NN regression,0.733,0.791
Random splitting,qm9,NN regression,0.811,0.823
Stratified splitting,qm9,NN regression,0.843,0.818
User-defined splitting,kaggle,NN regression,0.748,0.452
+6 −1
Original line number Diff line number Diff line
@@ -8,8 +8,13 @@ python setup.py install
rm examples/results.csv || true
cd examples
python benchmark.py -d tox21
export retval1=$?

cd ..
nosetests -v devtools/jenkins/compare_results.py --with-xunit || true
export retval2=$?

source deactivate
conda remove --name $envname --all
export retval=$(($retval1 + $retval2))
return ${retval}
 No newline at end of file
+87 −3
Original line number Diff line number Diff line
@@ -3,6 +3,8 @@ Index splitting
|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.903              |0.705              |
|           |Random Forest       |0.999              |0.733              |
|           |IRV                 |0.811              |0.767              |
|           |Multitask network   |0.856              |0.763              |
|           |robust MT-NN        |0.857              |0.767              |
|           |graph convolution   |0.872              |0.798              |
@@ -15,6 +17,8 @@ Index splitting
|           |robust MT-NN        |0.809              |0.783              |
|           |graph convolution   |0.876              |0.852              |
|sider      |logistic regression |0.933              |0.620              |
|           |Random Forest       |0.999              |0.670              |
|           |IRV                 |0.649              |0.642              |
|           |Multitask network   |0.775              |0.634              |
|           |robust MT-NN        |0.803              |0.632              |
|           |graph convolution   |0.708              |0.594              |
@@ -23,16 +27,26 @@ Index splitting
|           |robust MT-NN        |0.825              |0.680              |
|           |graph convolution   |0.821              |0.720              |
|clintox    |logistic regression |0.967              |0.676              |
|           |Random Forest       |0.995              |0.776              |
|           |IRV                 |0.763              |0.814              |
|           |Multitask network   |0.934              |0.830              |
|           |robust MT-NN        |0.949              |0.827              |
|           |graph convolution   |0.946              |0.860              |
|hiv        |logistic regression |0.864              |0.739              |
|           |Random Forest       |0.999              |0.720              |
|           |IRV                 |0.841              |0.724              |
|           |Multitask network   |0.761              |0.652              |
|           |robust MT-NN        |0.780              |0.708              |
|           |graph convolution   |0.876              |0.779              |

Random splitting

|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.903              |0.735              |
|           |Multitask network   |0.856              |0.783              |
|tox21      |logistic regression |0.902              |0.715              |
|           |Random Forest       |0.999              |0.764              |
|           |IRV                 |0.808              |0.767              |
|           |Multitask network   |0.844              |0.795              |
|           |robust MT-NN        |0.855              |0.773              |
|           |graph convolution   |0.865              |0.827              |
|muv        |logistic regression |0.957              |0.719              |
@@ -44,6 +58,8 @@ Random splitting
|           |robust MT-NN        |0.811              |0.771              |
|           |graph convolution   |0.872       	     |0.844              |
|sider      |logistic regression |0.929        	     |0.656              |
|           |Random Forest       |0.999              |0.665              |
|           |IRV                 |0.648              |0.596              |
|           |Multitask network   |0.777        	     |0.655              |
|           |robust MT-NN        |0.804              |0.630              |
|           |graph convolution   |0.705        	     |0.618              |
@@ -52,15 +68,25 @@ Random splitting
|           |robust MT-NN        |0.822              |0.681              |
|           |graph convolution   |0.820        	     |0.717              |
|clintox    |logistic regression |0.972              |0.725              |
|           |Random Forest       |0.997              |0.670              |
|           |IRV                 |0.809              |0.846              |
|           |Multitask network   |0.951              |0.834              |
|           |robust MT-NN        |0.959              |0.830              |
|           |graph convolution   |0.975              |0.876              |
|hiv        |logistic regression |0.860              |0.806              |
|           |Random Forest       |0.999              |0.850              |
|           |IRV                 |0.839              |0.809              |
|           |Multitask network   |0.742              |0.715              |
|           |robust MT-NN        |0.753              |0.727              |
|           |graph convolution   |0.847              |0.803              |

Scaffold splitting

|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.900              |0.650              |
|           |Random Forest       |0.999              |0.629              |
|           |IRV                 |0.823              |0.708              |
|           |Multitask network   |0.863              |0.703              |
|           |robust MT-NN        |0.861              |0.710              |
|           |graph convolution   |0.885              |0.732              |
@@ -73,6 +99,8 @@ Scaffold splitting
|           |robust MT-NN        |0.812              |0.756              |
|           |graph convolution   |0.874              |0.817              |
|sider      |logistic regression |0.926              |0.592              |
|           |Random Forest       |0.999              |0.619              |
|           |IRV                 |0.639              |0.599              |
|           |Multitask network   |0.776              |0.557              |
|           |robust MT-NN        |0.797              |0.560              |
|           |graph convolution   |0.722              |0.583              |
@@ -81,6 +109,62 @@ Scaffold splitting
|           |robust MT-NN        |0.830              |0.614              |
|           |graph convolution   |0.832              |0.638              |
|clintox    |logistic regression |0.960              |0.803              |
|           |Random Forest       |0.993              |0.735              |
|           |IRV                 |0.793              |0.718              |
|           |Multitask network   |0.947              |0.862              |
|           |robust MT-NN        |0.953              |0.890              |
|           |graph convolution   |0.957              |0.823              |
|hiv        |logistic regression |0.858              |0.798              |
|           |Random Forest       |0.946              |0.562              |
|           |IRV                 |0.847              |0.811              |
|           |Multitask network   |0.775              |0.765              |
|           |robust MT-NN        |0.785              |0.748              |
|           |graph convolution   |0.867              |0.769              |

* Regression

|Dataset         |Model               |Splitting   |Train score/R2|Valid score/R2|
|----------------|--------------------|------------|--------------|--------------|
|delaney         |Random Forest       |Index       |0.953         |0.626         |
|                |NN regression       |Index       |0.868         |0.578         |
|                |graphconv regression|Index       |0.967         |0.790         |
|                |Random Forest       |Random      |0.951         |0.684         |
|                |NN regression       |Random      |0.865         |0.574         |
|                |graphconv regression|Random      |0.964         |0.782         |
|                |Random Forest       |Scaffold    |0.953         |0.284         |
|                |NN regression       |Scaffold    |0.866         |0.342         |
|                |graphconv regression|Scaffold    |0.967         |0.606         |
|sampl           |Random Forest       |Index       |0.968         |0.736         |
|                |NN regression       |Index       |0.917         |0.764         |
|                |graphconv regression|Index       |0.982         |0.864         |
|                |Random Forest       |Random      |0.967         |0.752         |
|                |NN regression       |Random      |0.908         |0.830         |
|                |graphconv regression|Random      |0.987         |0.868         |
|                |Random Forest       |Scaffold    |0.966         |0.473         |
|                |NN regression       |Scaffold    |0.891         |0.217         |
|                |graphconv regression|Scaffold    |0.985         |0.666         |
|nci             |NN regression       |Index       |0.171         |0.062         |
|                |graphconv regression|Index       |0.123         |0.048         |
|                |NN regression       |Random      |0.168         |0.085         |
|                |graphconv regression|Random      |0.117         |0.076         |
|                |NN regression       |Scaffold    |0.180         |0.052         |
|                |graphconv regression|Scaffold    |0.131         |0.046         |
|pdbbind(core)   |Random Forest       |Random      |0.969         |0.445         |
|                |NN regression       |Random      |0.973         |0.494         |
|pdbbind(refined)|Random Forest       |Random      |0.963         |0.511         |
|                |NN regression       |Random      |0.987         |0.503         |
|pdbbind(full)   |Random Forest       |Random      |0.965         |0.493         |
|                |NN regression       |Random      |0.983         |0.528         |
|chembl          |MT-NN regression    |Index       |0.443         |0.427         |
|                |MT-NN regression    |Random      |0.464         |0.434         |
|                |MT-NN regression    |Scaffold    |0.484         |0.361         |
|qm7             |NN regression       |Index       |0.997         |0.986         |
|                |NN regression       |Random      |0.999         |0.999         |
|                |NN regression       |Stratified  |0.999         |0.999         |
|qm7b            |MT-NN regression    |Index       |0.931         |0.803         |
|                |MT-NN regression    |Random      |0.923         |0.884         |
|                |MT-NN regression    |Stratified  |0.934         |0.884         |
|qm9             |MT-NN regression    |Index       |0.733         |0.791         |
|                |MT-NN regression    |Random      |0.811         |0.823         |
|                |MT-NN regression    |Stratified  |0.843         |0.818         |
|kaggle          |MT-NN regression    |User-defined|0.748         |0.452         |
+44 −6
Original line number Diff line number Diff line
@@ -4,9 +4,11 @@ Utility script to convert the benchmark markdown table into a CSV
import sys


def table_to_csv(lines):
def classification_table_to_csv(lines):
  output = []
  headers = ["split", "dataset", "model", "Train score/ROC-AUC", "Valid score/ROC-AUC"]
  headers = [
      "split", "dataset", "model", "Train score/ROC-AUC", "Valid score/ROC-AUC"
  ]
  output.append(",".join(headers))
  for line in lines:
    vars = [x.strip() for x in line.split('|')]
@@ -29,10 +31,46 @@ def table_to_csv(lines):
    print(l)


def table_to_json(f):
  lines = [x.strip() for x in open(f).readlines()]
  table_to_csv(lines)
def regression_table_to_csv(lines):
  output = []

  for line in lines:
    vars = [x.strip() for x in line.split('|')]
    if len(vars) == 0:
      continue
    if len(vars) == 1 and vars[0] == "":
      continue
    if len(vars) == 1:
      continue
    if vars[1] == "Dataset":
      continue
    if vars[1].startswith("-----"):
      continue
    my_dataset, model, split, train, test = vars[1:-1]
    if my_dataset != "":
      dataset = my_dataset
    if model == "MT-NN regression":
      model = "NN regression"
    split = "%s splitting" % split
    output.append(",".join([split, dataset, model, train, test]))
  for l in output:
    print(l)


def split_classification_regression(lines):
  for i in range(len(lines)):
    if lines[i].startswith("* Regression"):
      split_index = i
      break
  return lines[:split_index], lines[split_index:]


def create_csv(f1):
  lines = [x.strip() for x in open(f1).readlines()]
  classification, regression = split_classification_regression(lines)
  classification_table_to_csv(classification)
  regression_table_to_csv(regression)


if __name__ == "__main__":
  table_to_json(sys.argv[1])
  create_csv(sys.argv[1])
Loading