Commit e5a1f49d authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #395 from lilleswing/jenkins

Jenkins Configuration And Run Scripts
parents 3a7e7e7f 7acf69a2
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -221,8 +221,8 @@ Random splitting

|Dataset    |Model               |Train score/ROC-AUC|Valid score/ROC-AUC|
|-----------|--------------------|-------------------|-------------------|
|tox21      |logistic regression |0.903              |0.735              |
|           |Multitask network   |0.856              |0.783              |
|tox21      |logistic regression |0.902              |0.715              |
|           |Multitask network   |0.844              |0.795              |
|           |robust MT-NN        |0.855              |0.773              |
|           |graph convolution   |0.865              |0.827              |
|muv        |logistic regression |0.957              |0.719              |
+2 −1
Original line number Diff line number Diff line
@@ -388,8 +388,9 @@ class ScaffoldSplitter(Splitter):
      else:
        scaffolds[scaffold].append(ind)
    # Sort from largest to smallest scaffold sets
    scaffolds = {key: sorted(value) for key, value in scaffolds.items()}
    scaffold_sets = [scaffold_set for (scaffold, scaffold_set) in
                     sorted(scaffolds.items(), key=lambda x: -len(x[1]))]
                     sorted(scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True)]
    train_cutoff = frac_train * len(dataset)
    valid_cutoff = (frac_train + frac_valid) * len(dataset)
    train_inds, valid_inds, test_inds = [], [], []
+54 −0
Original line number Diff line number Diff line
# Usage Instructions
On Stanford Internal Network http://171.65.103.54:8080/

username: deepAdmin

password: check sticky-note on tensorbro 

# Moving Instructions
All the configuration nesseccary to run jenkins is in /var/jenkins

Simply rsync /var/jenkins to the new server.

# Install Instructions
#### Update the System
``` bash
sudo apt-get update
sudo apt-get upgrade
```
#### Install Build Dependency Packages
``` bash
sudo apt-get install -y build-essential git python-pip libfreetype6-dev libxft-dev libncurses-dev libopenblas-dev gfortran python-matplotlib libblas-dev liblapack-dev libatlas-base-dev python-dev python-pydot linux-headers-generic linux-image-extra-virtual unzip python-numpy swig python-pandas python-sklearn unzip wget pkg-config zip g++ zlib1g-dev libcurl3-dev
```

#### Install latest Cuda and cudnn
``` bash 
wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
sudo dpkg -i sudo dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
rm cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
sudo apt-get update
sudo apt-get install -y cuda
sudo dpkg -i libcudnn5_5.1.5-1+cuda8.0_amd64.deb
sudo dpkg -i libcudnn5-dev_5.1.5-1+cuda8.0_amd64.deb
```

#### Install Jenkins
``` bash
wget -q -O - https://pkg.jenkins.io/debian/jenkins-ci.org.key | sudo apt-key add -
sudo sh -c 'echo deb http://pkg.jenkins.io/debian-stable binary/ > /etc/apt/sources.list.d/jenkins.list'
sudo apt-get update
sudo apt-get install jenkins
``` 

#### Install Conda For the Jenkins User
``` bash
sudo su - jenkins
bash Anaconda3-4.3.0-Linux-x86_64.sh
```

#### Configure Through Web-UI
``` bash
GOTO http://server:8080
Install Suggested Plugins Through Web-UI
Create First Admin User
```
+80 −0
Original line number Diff line number Diff line
from nose.tools import assert_true, nottest

CUSHION_PERCENT = 0.01
BENCHMARK_TO_DESIRED_KEY_MAP = {
  "index": "Index splitting",
  "random": "Random splitting",
  "scaffold": "Scaffold splitting",
  "logreg": "logistic regression",
  "tf": "Multitask network",
  "tf_robust": "robust MT-NN",
  "graphconv": "graph convolution",
}
DESIRED_RESULTS_CSV = "devtools/jenkins/desired_results.csv"
TEST_RESULTS_CSV = "examples/results.csv"


def parse_desired_results(desired_results):
  retval = []
  for line in desired_results:
    vars = line.split(',')
    retval.append({
      "split": vars[0],
      "data_set": vars[1],
      "model": vars[2],
      "train_score": float(vars[3]),
      "test_score": float(vars[4])
    })
  return retval


@nottest
def parse_test_results(test_results):
  retval = []
  for line in test_results:
    vars = line.split(',')
    retval.append({
      "split": BENCHMARK_TO_DESIRED_KEY_MAP[vars[2]],
      "data_set": vars[1],
      "model": BENCHMARK_TO_DESIRED_KEY_MAP[vars[5]],
      "train_score": float(vars[6]),
      "test_score": float(vars[9])
    })
  return retval


def find_desired_result(result, desired_results):
  for desired_result in desired_results:
    if result['data_set'] == desired_result['data_set'] and \
        result['split'] == desired_result['split'] and \
        result['model'] == desired_result['model']:
      return desired_result
  raise Exception("Unable to find desired result \n%s" % result)


def is_good_result(my_result, desired_result):
  for key in ['train_score', 'test_score']:
    # Higher is Better
    desired_value = desired_result[key] * (1.0 - CUSHION_PERCENT)
    if my_result[key] < desired_value:
      return False
  return True


def test_compare_results():
  desired_results = open(DESIRED_RESULTS_CSV).readlines()[1:]
  desired_results = parse_desired_results(desired_results)
  test_results = open(TEST_RESULTS_CSV).readlines()
  test_results = parse_test_results(test_results)
  exceptions = []
  for test_result in test_results:
    desired_result = find_desired_result(test_result, desired_results)
    if not is_good_result(test_result, desired_result):
      exceptions.append(({"test_result": test_result}, {"desired_result": desired_result}))
  if len(exceptions) > 0:
    for exception in exceptions:
      print(exception)
    assert_true(len(exceptions) == 0, "Some performance benchmarks not passed")

  if __name__ == "__main__":
    test_compare_results()
+73 −0
Original line number Diff line number Diff line
split,dataset,model,Train score/ROC-AUC,Valid score/ROC-AUC
Index splitting,tox21,logistic regression,0.903,0.705
Index splitting,tox21,Multitask network,0.856,0.763
Index splitting,tox21,robust MT-NN,0.857,0.767
Index splitting,tox21,graph convolution,0.872,0.798
Index splitting,muv,logistic regression,0.963,0.766
Index splitting,muv,Multitask network,0.904,0.764
Index splitting,muv,robust MT-NN,0.934,0.781
Index splitting,muv,graph convolution,0.840,0.823
Index splitting,pcba,logistic regression,0.809,0.776
Index splitting,pcba,Multitask network,0.826,0.802
Index splitting,pcba,robust MT-NN,0.809,0.783
Index splitting,pcba,graph convolution,0.876,0.852
Index splitting,sider,logistic regression,0.933,0.620
Index splitting,sider,Multitask network,0.775,0.634
Index splitting,sider,robust MT-NN,0.803,0.632
Index splitting,sider,graph convolution,0.708,0.594
Index splitting,toxcast,logistic regression,0.721,0.575
Index splitting,toxcast,Multitask network,0.830,0.678
Index splitting,toxcast,robust MT-NN,0.825,0.680
Index splitting,toxcast,graph convolution,0.821,0.720
Index splitting,clintox,logistic regression,0.967,0.676
Index splitting,clintox,Multitask network,0.934,0.830
Index splitting,clintox,robust MT-NN,0.949,0.827
Index splitting,clintox,graph convolution,0.946,0.860
Random splitting,tox21,logistic regression,0.902,0.715
Random splitting,tox21,Multitask network,0.844,0.795
Random splitting,tox21,robust MT-NN,0.855,0.773
Random splitting,tox21,graph convolution,0.865,0.827
Random splitting,muv,logistic regression,0.957,0.719
Random splitting,muv,Multitask network,0.902,0.734
Random splitting,muv,robust MT-NN,0.933,0.732
Random splitting,muv,graph convolution,0.860,0.730
Random splitting,pcba,logistic regression,0.808,0.776
Random splitting,pcba,Multitask network,0.811,0.778
Random splitting,pcba,robust MT-NN,0.811,0.771
Random splitting,pcba,graph convolution,0.872,0.844
Random splitting,sider,logistic regression,0.929,0.656
Random splitting,sider,Multitask network,0.777,0.655
Random splitting,sider,robust MT-NN,0.804,0.630
Random splitting,sider,graph convolution,0.705,0.618
Random splitting,toxcast,logistic regression,0.725,0.586
Random splitting,toxcast,Multitask network,0.836,0.684
Random splitting,toxcast,robust MT-NN,0.822,0.681
Random splitting,toxcast,graph convolution,0.820,0.717
Random splitting,clintox,logistic regression,0.972,0.725
Random splitting,clintox,Multitask network,0.951,0.834
Random splitting,clintox,robust MT-NN,0.959,0.830
Random splitting,clintox,graph convolution,0.975,0.876
Scaffold splitting,tox21,logistic regression,0.900,0.650
Scaffold splitting,tox21,Multitask network,0.863,0.703
Scaffold splitting,tox21,robust MT-NN,0.861,0.710
Scaffold splitting,tox21,graph convolution,0.885,0.732
Scaffold splitting,muv,logistic regression,0.947,0.767
Scaffold splitting,muv,Multitask network,0.899,0.762
Scaffold splitting,muv,robust MT-NN,0.944,0.726
Scaffold splitting,muv,graph convolution,0.872,0.795
Scaffold splitting,pcba,logistic regression,0.810,0.742
Scaffold splitting,pcba,Multitask network,0.814,0.760
Scaffold splitting,pcba,robust MT-NN,0.812,0.756
Scaffold splitting,pcba,graph convolution,0.874,0.817
Scaffold splitting,sider,logistic regression,0.926,0.592
Scaffold splitting,sider,Multitask network,0.776,0.557
Scaffold splitting,sider,robust MT-NN,0.797,0.560
Scaffold splitting,sider,graph convolution,0.722,0.583
Scaffold splitting,toxcast,logistic regression,0.716,0.492
Scaffold splitting,toxcast,Multitask network,0.828,0.617
Scaffold splitting,toxcast,robust MT-NN,0.830,0.614
Scaffold splitting,toxcast,graph convolution,0.832,0.638
Scaffold splitting,clintox,logistic regression,0.960,0.803
Scaffold splitting,clintox,Multitask network,0.947,0.862
Scaffold splitting,clintox,robust MT-NN,0.953,0.890
Scaffold splitting,clintox,graph convolution,0.957,0.823
Loading