move to examples (e149a44f) · Commits · 钟慕尧 / deepchem

22_Transfer_Learning_With_HuggingFace_tox21.ipynb

deleted100644 → 0

+0 −7928

File deleted.

Preview size limit exceeded, changes collapsed.

examples/tutorials/22_Transfer_Learning_With_HuggingFace_tox21.ipynb

+29 −29

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@
		"provenance": [],
		"collapsed_sections": [],
		"mount_file_id": "1pD0fsKpYujJgNAttRn9vkdBYGpwCeVC0",
		"authorship_tag": "ABX9TyPyKWYOalt7P45/PzaAkzRP",
		"authorship_tag": "ABX9TyOqfnobS4p9ovUKCyQSOUah",
		"include_colab_link": true
		},
		"kernelspec": {
		@@ -5173,7 +5173,7 @@
		"colab_type": "text"
		},
		"source": [
		"<a href=\"https://colab.research.google.com/github/seyonechithrananda/bert-loves-chemistry/blob/master/22_Transfer_Learning_With_HuggingFace_tox21.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
		"<a href=\"https://colab.research.google.com/github/seyonechithrananda/deepchem/blob/chemberta-tutorial/22_Transfer_Learning_With_HuggingFace_tox21.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
		]
		},
		{
		@@ -5226,7 +5226,7 @@
		"source": [
		"!pip install transformers\n"
		],
		"execution_count": 1,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -5290,7 +5290,7 @@
		" sys.path += ['bertviz_repo']\n",
		"!pip install regex"
		],
		"execution_count": 2,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -5353,7 +5353,7 @@
		"# Test if NVIDIA apex training tool works\n",
		"from apex import amp"
		],
		"execution_count": 4,
		"execution_count": null,
		"outputs": []
		},
		{
		@@ -5421,12 +5421,12 @@
		"from transformers import AutoModelWithLMHead, AutoTokenizer, pipeline, RobertaModel, RobertaTokenizer\n",
		"from bertviz import head_view\n",
		"\n",
		"model = AutoModelWithLMHead.from_pretrained(\"seyonec/ChemBERTa-zinc-base-v1\")\n",
		"tokenizer = AutoTokenizer.from_pretrained(\"seyonec/ChemBERTa-zinc-base-v1\")\n",
		"model = AutoModelWithLMHead.from_pretrained(\"seyonec/ChemBERTa_zinc250k_v2_40k\")\n",
		"tokenizer = AutoTokenizer.from_pretrained(\"seyonec/ChemBERTa_zinc250k_v2_40k\")\n",
		"\n",
		"fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)\n"
		],
		"execution_count": 5,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -5598,7 +5598,7 @@
		"for smi in masked_smi:\n",
		" print(smi)"
		],
		"execution_count": 6,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -5686,7 +5686,7 @@
		" img = MolToImage(mol, size=(400, 400),fitImage=True)\n",
		" return img"
		],
		"execution_count": 8,
		"execution_count": null,
		"outputs": []
		},
		{
		@@ -5727,7 +5727,7 @@
		" img.format=\"PNG\" \n",
		" image_list.append(img)"
		],
		"execution_count": 9,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -5759,7 +5759,7 @@
		"for img in image_list:\n",
		" display(img)"
		],
		"execution_count": 10,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -5835,7 +5835,7 @@
		" }\n",
		"});"
		],
		"execution_count": 11,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -5881,7 +5881,7 @@
		" </script>\n",
		" '''))"
		],
		"execution_count": 12,
		"execution_count": null,
		"outputs": []
		},
		{
		@@ -5926,7 +5926,7 @@
		"m = Chem.MolFromSmiles('CCCCC[C@@H](Br)CC')\n",
		"fig = Draw.MolToMPL(m, size=(200, 200))"
		],
		"execution_count": 13,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -5968,7 +5968,7 @@
		"m = Chem.MolFromSmiles('CCCCC[C@H](Br)CC')\n",
		"fig = Draw.MolToMPL(m, size=(200,200))"
		],
		"execution_count": 14,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -6090,7 +6090,7 @@
		"\n",
		"head_view(attention, tokens)"
		],
		"execution_count": 15,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "display_data",
		@@ -6735,7 +6735,7 @@
		"source": [
		"!wget https://t.co/zrC7F8DcRs?amp=1"
		],
		"execution_count": 16,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -6831,7 +6831,7 @@
		"df.rename(columns={0:'smiles',1:'labels'}, inplace=True)\n",
		"df.head()"
		],
		"execution_count": 18,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "execute_result",
		@@ -6930,7 +6930,7 @@
		"transformers_logger = logging.getLogger(\"transformers\")\n",
		"transformers_logger.setLevel(logging.WARNING)"
		],
		"execution_count": 19,
		"execution_count": null,
		"outputs": []
		},
		{
		@@ -6957,7 +6957,7 @@
		"source": [
		"model = ClassificationModel('roberta', 'seyonec/ChemBERTa-zinc-base-v1', args={'num_train_epochs': 3, 'auto_weights': True}) # You can set class weights by using the optional weight argument\n"
		],
		"execution_count": 20,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -6983,7 +6983,7 @@
		"train_dataset=df.sample(frac=train_size,random_state=200).reset_index(drop=True)\n",
		"test_dataset=df.drop(train_dataset.index).reset_index(drop=True)"
		],
		"execution_count": 21,
		"execution_count": null,
		"outputs": []
		},
		{
		@@ -7004,7 +7004,7 @@
		"print(\"TRAIN Dataset: {}\".format(train_dataset.shape))\n",
		"print(\"TEST Dataset: {}\".format(test_dataset.shape))"
		],
		"execution_count": 22,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7043,7 +7043,7 @@
		"source": [
		"!wandb login"
		],
		"execution_count": 23,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7118,7 +7118,7 @@
		"# Train the model\n",
		"model.train_model(train_dataset, output_dir='/content/chemberta_tox21', num_labels=2, use_cuda=True, args={'wandb_project': 'project-name'})\n"
		],
		"execution_count": 24,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7630,7 +7630,7 @@
		"source": [
		"!pip install -U scikit-learn"
		],
		"execution_count": 25,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7702,7 +7702,7 @@
		"import sklearn\n",
		"result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc=sklearn.metrics.accuracy_score)\n"
		],
		"execution_count": 26,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7813,7 +7813,7 @@
		"# Lets input a molecule with a SR-p53 value of 0\n",
		"predictions, raw_outputs = model.predict(['CCCCOc1cc(C(=O)OCCN(CC)CC)ccc1N'])\n"
		],
		"execution_count": 27,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",
		@@ -7885,7 +7885,7 @@
		"print(predictions)\n",
		"print(raw_outputs)"
		],
		"execution_count": 28,
		"execution_count": null,
		"outputs": [
		{
		"output_type": "stream",

Admin message