Merge pull request #395 from lilleswing/jenkins (e5a1f49d) · Commits · 钟慕尧 / deepchem

README.md

+2 −2

Original line number	Diff line number	Diff line
		@@ -221,8 +221,8 @@ Random splitting

		\|Dataset \|Model \|Train score/ROC-AUC\|Valid score/ROC-AUC\|
		\|-----------\|--------------------\|-------------------\|-------------------\|
		\|tox21 \|logistic regression \|0.903 \|0.735 \|
		\| \|Multitask network \|0.856 \|0.783 \|
		\|tox21 \|logistic regression \|0.902 \|0.715 \|
		\| \|Multitask network \|0.844 \|0.795 \|
		\| \|robust MT-NN \|0.855 \|0.773 \|
		\| \|graph convolution \|0.865 \|0.827 \|
		\|muv \|logistic regression \|0.957 \|0.719 \|

deepchem/splits/splitters.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -388,8 +388,9 @@ class ScaffoldSplitter(Splitter):
		else:
		scaffolds[scaffold].append(ind)
		# Sort from largest to smallest scaffold sets
		scaffolds = {key: sorted(value) for key, value in scaffolds.items()}
		scaffold_sets = [scaffold_set for (scaffold, scaffold_set) in
		sorted(scaffolds.items(), key=lambda x: -len(x[1]))]
		sorted(scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True)]
		train_cutoff = frac_train * len(dataset)
		valid_cutoff = (frac_train + frac_valid) * len(dataset)
		train_inds, valid_inds, test_inds = [], [], []

devtools/jenkins/Readme.md

0 → 100644

+54 −0

Original line number	Diff line number	Diff line
		# Usage Instructions
		On Stanford Internal Network http://171.65.103.54:8080/

		username: deepAdmin

		password: check sticky-note on tensorbro

		# Moving Instructions
		All the configuration nesseccary to run jenkins is in /var/jenkins

		Simply rsync /var/jenkins to the new server.

		# Install Instructions
		#### Update the System
		``` bash
		sudo apt-get update
		sudo apt-get upgrade
		```
		#### Install Build Dependency Packages
		``` bash
		sudo apt-get install -y build-essential git python-pip libfreetype6-dev libxft-dev libncurses-dev libopenblas-dev gfortran python-matplotlib libblas-dev liblapack-dev libatlas-base-dev python-dev python-pydot linux-headers-generic linux-image-extra-virtual unzip python-numpy swig python-pandas python-sklearn unzip wget pkg-config zip g++ zlib1g-dev libcurl3-dev
		```

		#### Install latest Cuda and cudnn
		``` bash
		wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
		sudo dpkg -i sudo dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
		rm cuda-repo-ubuntu1604-8-0-local_8.0.44-1_amd64-deb
		sudo apt-get update
		sudo apt-get install -y cuda
		sudo dpkg -i libcudnn5_5.1.5-1+cuda8.0_amd64.deb
		sudo dpkg -i libcudnn5-dev_5.1.5-1+cuda8.0_amd64.deb
		```

		#### Install Jenkins
		``` bash
		wget -q -O - https://pkg.jenkins.io/debian/jenkins-ci.org.key \| sudo apt-key add -
		sudo sh -c 'echo deb http://pkg.jenkins.io/debian-stable binary/ > /etc/apt/sources.list.d/jenkins.list'
		sudo apt-get update
		sudo apt-get install jenkins
		```

		#### Install Conda For the Jenkins User
		``` bash
		sudo su - jenkins
		bash Anaconda3-4.3.0-Linux-x86_64.sh
		```

		#### Configure Through Web-UI
		``` bash
		GOTO http://server:8080
		Install Suggested Plugins Through Web-UI
		Create First Admin User
		```

devtools/jenkins/compare_results.py

0 → 100644

+80 −0

Original line number	Diff line number	Diff line
		from nose.tools import assert_true, nottest

		CUSHION_PERCENT = 0.01
		BENCHMARK_TO_DESIRED_KEY_MAP = {
		"index": "Index splitting",
		"random": "Random splitting",
		"scaffold": "Scaffold splitting",
		"logreg": "logistic regression",
		"tf": "Multitask network",
		"tf_robust": "robust MT-NN",
		"graphconv": "graph convolution",
		}
		DESIRED_RESULTS_CSV = "devtools/jenkins/desired_results.csv"
		TEST_RESULTS_CSV = "examples/results.csv"


		def parse_desired_results(desired_results):
		retval = []
		for line in desired_results:
		vars = line.split(',')
		retval.append({
		"split": vars[0],
		"data_set": vars[1],
		"model": vars[2],
		"train_score": float(vars[3]),
		"test_score": float(vars[4])
		})
		return retval


		@nottest
		def parse_test_results(test_results):
		retval = []
		for line in test_results:
		vars = line.split(',')
		retval.append({
		"split": BENCHMARK_TO_DESIRED_KEY_MAP[vars[2]],
		"data_set": vars[1],
		"model": BENCHMARK_TO_DESIRED_KEY_MAP[vars[5]],
		"train_score": float(vars[6]),
		"test_score": float(vars[9])
		})
		return retval


		def find_desired_result(result, desired_results):
		for desired_result in desired_results:
		if result['data_set'] == desired_result['data_set'] and \
		result['split'] == desired_result['split'] and \
		result['model'] == desired_result['model']:
		return desired_result
		raise Exception("Unable to find desired result \n%s" % result)


		def is_good_result(my_result, desired_result):
		for key in ['train_score', 'test_score']:
		# Higher is Better
		desired_value = desired_result[key] * (1.0 - CUSHION_PERCENT)
		if my_result[key] < desired_value:
		return False
		return True


		def test_compare_results():
		desired_results = open(DESIRED_RESULTS_CSV).readlines()[1:]
		desired_results = parse_desired_results(desired_results)
		test_results = open(TEST_RESULTS_CSV).readlines()
		test_results = parse_test_results(test_results)
		exceptions = []
		for test_result in test_results:
		desired_result = find_desired_result(test_result, desired_results)
		if not is_good_result(test_result, desired_result):
		exceptions.append(({"test_result": test_result}, {"desired_result": desired_result}))
		if len(exceptions) > 0:
		for exception in exceptions:
		print(exception)
		assert_true(len(exceptions) == 0, "Some performance benchmarks not passed")

		if __name__ == "__main__":
		test_compare_results()

devtools/jenkins/desired_results.csv

0 → 100644

+73 −0

Original line number	Diff line number	Diff line
		split,dataset,model,Train score/ROC-AUC,Valid score/ROC-AUC
		Index splitting,tox21,logistic regression,0.903,0.705
		Index splitting,tox21,Multitask network,0.856,0.763
		Index splitting,tox21,robust MT-NN,0.857,0.767
		Index splitting,tox21,graph convolution,0.872,0.798
		Index splitting,muv,logistic regression,0.963,0.766
		Index splitting,muv,Multitask network,0.904,0.764
		Index splitting,muv,robust MT-NN,0.934,0.781
		Index splitting,muv,graph convolution,0.840,0.823
		Index splitting,pcba,logistic regression,0.809,0.776
		Index splitting,pcba,Multitask network,0.826,0.802
		Index splitting,pcba,robust MT-NN,0.809,0.783
		Index splitting,pcba,graph convolution,0.876,0.852
		Index splitting,sider,logistic regression,0.933,0.620
		Index splitting,sider,Multitask network,0.775,0.634
		Index splitting,sider,robust MT-NN,0.803,0.632
		Index splitting,sider,graph convolution,0.708,0.594
		Index splitting,toxcast,logistic regression,0.721,0.575
		Index splitting,toxcast,Multitask network,0.830,0.678
		Index splitting,toxcast,robust MT-NN,0.825,0.680
		Index splitting,toxcast,graph convolution,0.821,0.720
		Index splitting,clintox,logistic regression,0.967,0.676
		Index splitting,clintox,Multitask network,0.934,0.830
		Index splitting,clintox,robust MT-NN,0.949,0.827
		Index splitting,clintox,graph convolution,0.946,0.860
		Random splitting,tox21,logistic regression,0.902,0.715
		Random splitting,tox21,Multitask network,0.844,0.795
		Random splitting,tox21,robust MT-NN,0.855,0.773
		Random splitting,tox21,graph convolution,0.865,0.827
		Random splitting,muv,logistic regression,0.957,0.719
		Random splitting,muv,Multitask network,0.902,0.734
		Random splitting,muv,robust MT-NN,0.933,0.732
		Random splitting,muv,graph convolution,0.860,0.730
		Random splitting,pcba,logistic regression,0.808,0.776
		Random splitting,pcba,Multitask network,0.811,0.778
		Random splitting,pcba,robust MT-NN,0.811,0.771
		Random splitting,pcba,graph convolution,0.872,0.844
		Random splitting,sider,logistic regression,0.929,0.656
		Random splitting,sider,Multitask network,0.777,0.655
		Random splitting,sider,robust MT-NN,0.804,0.630
		Random splitting,sider,graph convolution,0.705,0.618
		Random splitting,toxcast,logistic regression,0.725,0.586
		Random splitting,toxcast,Multitask network,0.836,0.684
		Random splitting,toxcast,robust MT-NN,0.822,0.681
		Random splitting,toxcast,graph convolution,0.820,0.717
		Random splitting,clintox,logistic regression,0.972,0.725
		Random splitting,clintox,Multitask network,0.951,0.834
		Random splitting,clintox,robust MT-NN,0.959,0.830
		Random splitting,clintox,graph convolution,0.975,0.876
		Scaffold splitting,tox21,logistic regression,0.900,0.650
		Scaffold splitting,tox21,Multitask network,0.863,0.703
		Scaffold splitting,tox21,robust MT-NN,0.861,0.710
		Scaffold splitting,tox21,graph convolution,0.885,0.732
		Scaffold splitting,muv,logistic regression,0.947,0.767
		Scaffold splitting,muv,Multitask network,0.899,0.762
		Scaffold splitting,muv,robust MT-NN,0.944,0.726
		Scaffold splitting,muv,graph convolution,0.872,0.795
		Scaffold splitting,pcba,logistic regression,0.810,0.742
		Scaffold splitting,pcba,Multitask network,0.814,0.760
		Scaffold splitting,pcba,robust MT-NN,0.812,0.756
		Scaffold splitting,pcba,graph convolution,0.874,0.817
		Scaffold splitting,sider,logistic regression,0.926,0.592
		Scaffold splitting,sider,Multitask network,0.776,0.557
		Scaffold splitting,sider,robust MT-NN,0.797,0.560
		Scaffold splitting,sider,graph convolution,0.722,0.583
		Scaffold splitting,toxcast,logistic regression,0.716,0.492
		Scaffold splitting,toxcast,Multitask network,0.828,0.617
		Scaffold splitting,toxcast,robust MT-NN,0.830,0.614
		Scaffold splitting,toxcast,graph convolution,0.832,0.638
		Scaffold splitting,clintox,logistic regression,0.960,0.803
		Scaffold splitting,clintox,Multitask network,0.947,0.862
		Scaffold splitting,clintox,robust MT-NN,0.953,0.890
		Scaffold splitting,clintox,graph convolution,0.957,0.823

Original line number	Diff line number	Diff line
		@@ -221,8 +221,8 @@ Random splitting

		\|Dataset \|Model \|Train score/ROC-AUC\|Valid score/ROC-AUC\|
		\|-----------\|--------------------\|-------------------\|-------------------\|
		\|tox21 \|logistic regression \|0.903 \|0.735 \|
		\| \|Multitask network \|0.856 \|0.783 \|
		\|tox21 \|logistic regression \|0.902 \|0.715 \|
		\| \|Multitask network \|0.844 \|0.795 \|
		\| \|robust MT-NN \|0.855 \|0.773 \|
		\| \|graph convolution \|0.865 \|0.827 \|
		\|muv \|logistic regression \|0.957 \|0.719 \|

Admin message