Commit e149a44f authored by seyonechithrananda's avatar seyonechithrananda
Browse files

move to examples

parent 5a5de538
Loading
Loading
Loading
Loading
+0 −7928

File deleted.

Preview size limit exceeded, changes collapsed.

+29 −29
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
      "provenance": [],
      "collapsed_sections": [],
      "mount_file_id": "1pD0fsKpYujJgNAttRn9vkdBYGpwCeVC0",
      "authorship_tag": "ABX9TyPyKWYOalt7P45/PzaAkzRP",
      "authorship_tag": "ABX9TyOqfnobS4p9ovUKCyQSOUah",
      "include_colab_link": true
    },
    "kernelspec": {
@@ -5173,7 +5173,7 @@
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/seyonechithrananda/bert-loves-chemistry/blob/master/22_Transfer_Learning_With_HuggingFace_tox21.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
        "<a href=\"https://colab.research.google.com/github/seyonechithrananda/deepchem/blob/chemberta-tutorial/22_Transfer_Learning_With_HuggingFace_tox21.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
@@ -5226,7 +5226,7 @@
      "source": [
        "!pip install transformers\n"
      ],
      "execution_count": 1,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -5290,7 +5290,7 @@
        "  sys.path += ['bertviz_repo']\n",
        "!pip install regex"
      ],
      "execution_count": 2,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -5353,7 +5353,7 @@
        "# Test if NVIDIA apex training tool works\n",
        "from apex import amp"
      ],
      "execution_count": 4,
      "execution_count": null,
      "outputs": []
    },
    {
@@ -5421,12 +5421,12 @@
        "from transformers import AutoModelWithLMHead, AutoTokenizer, pipeline, RobertaModel, RobertaTokenizer\n",
        "from bertviz import head_view\n",
        "\n",
        "model = AutoModelWithLMHead.from_pretrained(\"seyonec/ChemBERTa-zinc-base-v1\")\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"seyonec/ChemBERTa-zinc-base-v1\")\n",
        "model = AutoModelWithLMHead.from_pretrained(\"seyonec/ChemBERTa_zinc250k_v2_40k\")\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"seyonec/ChemBERTa_zinc250k_v2_40k\")\n",
        "\n",
        "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)\n"
      ],
      "execution_count": 5,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -5598,7 +5598,7 @@
        "for smi in masked_smi:\n",
        "  print(smi)"
      ],
      "execution_count": 6,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -5686,7 +5686,7 @@
        "        img = MolToImage(mol, size=(400, 400),fitImage=True)\n",
        "    return img"
      ],
      "execution_count": 8,
      "execution_count": null,
      "outputs": []
    },
    {
@@ -5727,7 +5727,7 @@
        "  img.format=\"PNG\" \n",
        "  image_list.append(img)"
      ],
      "execution_count": 9,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -5759,7 +5759,7 @@
        "for img in image_list:\n",
        "  display(img)"
      ],
      "execution_count": 10,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -5835,7 +5835,7 @@
        "  }\n",
        "});"
      ],
      "execution_count": 11,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -5881,7 +5881,7 @@
        "        </script>\n",
        "        '''))"
      ],
      "execution_count": 12,
      "execution_count": null,
      "outputs": []
    },
    {
@@ -5926,7 +5926,7 @@
        "m = Chem.MolFromSmiles('CCCCC[C@@H](Br)CC')\n",
        "fig = Draw.MolToMPL(m, size=(200, 200))"
      ],
      "execution_count": 13,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -5968,7 +5968,7 @@
        "m = Chem.MolFromSmiles('CCCCC[C@H](Br)CC')\n",
        "fig = Draw.MolToMPL(m, size=(200,200))"
      ],
      "execution_count": 14,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -6090,7 +6090,7 @@
        "\n",
        "head_view(attention, tokens)"
      ],
      "execution_count": 15,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
@@ -6735,7 +6735,7 @@
      "source": [
        "!wget https://t.co/zrC7F8DcRs?amp=1"
      ],
      "execution_count": 16,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -6831,7 +6831,7 @@
        "df.rename(columns={0:'smiles',1:'labels'}, inplace=True)\n",
        "df.head()"
      ],
      "execution_count": 18,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
@@ -6930,7 +6930,7 @@
        "transformers_logger = logging.getLogger(\"transformers\")\n",
        "transformers_logger.setLevel(logging.WARNING)"
      ],
      "execution_count": 19,
      "execution_count": null,
      "outputs": []
    },
    {
@@ -6957,7 +6957,7 @@
      "source": [
        "model = ClassificationModel('roberta', 'seyonec/ChemBERTa-zinc-base-v1', args={'num_train_epochs': 3, 'auto_weights': True}) # You can set class weights by using the optional weight argument\n"
      ],
      "execution_count": 20,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -6983,7 +6983,7 @@
        "train_dataset=df.sample(frac=train_size,random_state=200).reset_index(drop=True)\n",
        "test_dataset=df.drop(train_dataset.index).reset_index(drop=True)"
      ],
      "execution_count": 21,
      "execution_count": null,
      "outputs": []
    },
    {
@@ -7004,7 +7004,7 @@
        "print(\"TRAIN Dataset: {}\".format(train_dataset.shape))\n",
        "print(\"TEST Dataset: {}\".format(test_dataset.shape))"
      ],
      "execution_count": 22,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7043,7 +7043,7 @@
      "source": [
        "!wandb login"
      ],
      "execution_count": 23,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7118,7 +7118,7 @@
        "# Train the model\n",
        "model.train_model(train_dataset, output_dir='/content/chemberta_tox21', num_labels=2, use_cuda=True, args={'wandb_project': 'project-name'})\n"
      ],
      "execution_count": 24,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7630,7 +7630,7 @@
      "source": [
        "!pip install -U scikit-learn"
      ],
      "execution_count": 25,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7702,7 +7702,7 @@
        "import sklearn\n",
        "result, model_outputs, wrong_predictions = model.eval_model(test_dataset, acc=sklearn.metrics.accuracy_score)\n"
      ],
      "execution_count": 26,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7813,7 +7813,7 @@
        "# Lets input a molecule with a SR-p53 value of 0\n",
        "predictions, raw_outputs = model.predict(['CCCCOc1cc(C(=O)OCCN(CC)CC)ccc1N'])\n"
      ],
      "execution_count": 27,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
@@ -7885,7 +7885,7 @@
        "print(predictions)\n",
        "print(raw_outputs)"
      ],
      "execution_count": 28,
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",