Unverified Commit 91a76eae authored by Ashwin Murali's avatar Ashwin Murali Committed by GitHub
Browse files

Merge branch 'deepchem:master' into flake8fix

parents 0f2a6f11 eab6ee63
Loading
Loading
Loading
Loading
+137 −0
Original line number Diff line number Diff line
name: Test for DeepChem Jax
on:
  push: # ci work when pushing master branch
    branches:
      - master
  pull_request: # ci work when creating a PR to master branch
    branches:
      - master
jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: [3.7]
    steps:
    - uses: actions/checkout@v2
    - name: Cache pip modules for Linux
      uses: actions/cache@v2
      with:
        path: ~/.cache/pip
        key: ${{ runner.os }}-pip-${{ hashFiles('env.*.yml') }}
        restore-keys: |
          ${{ runner.os }}-pip-
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Build DeepChem
      run: |
        python -m pip install --upgrade pip
        pip install tensorflow==2.4
        pip install -e '.[jax]'
    - name: Import checking
      run: python -c "import deepchem; import jax;"

  test:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest]
        python-version: [3.7]
        include:
          - os: ubuntu-latest
            python-version: 3.6
    env:
      OS: ${{ matrix.os }}
      PYTHON_VERSION: ${{ matrix.python-version }}
    steps:
    - uses: actions/checkout@v2
      with:
        fetch-depth: 0
    # https://github.com/galaxyproject/tools-iuc/blob/master/.github/workflows/pr.yaml
    # The range of commits to check for changes is:
    # - for events on the master branch we compare against the sha before the event
    #   (note that this does not work for feature branch events since we want all
    #   commits on the feature branch and not just the commits of the last event)
    # - for pull requests we compare against the 1st ancestor, given the current
    #   HEAD is the merge between the PR branch and the base branch
    - name: Set commit range (push to the master branch, e.g. merge)
      if: github.ref == 'refs/heads/master' && github.event_name == 'push'
      run: echo "COMMIT_RANGE=${{ github.event.before }}.." >> $GITHUB_ENV
    - name: Set commit range (pull request)
      if: github.event_name == 'pull_request'
      run: |
        git fetch origin master
        echo "COMMIT_RANGE=origin/master..." >> $GITHUB_ENV
    - name: Cache pip packages for Linux
      if: runner.os == 'Linux'
      uses: actions/cache@v2
      with:
        path: ~/.cache/pip
        key: ${{ runner.os }}-pip-${{ hashFiles('env.*.yml') }}
        restore-keys: |
          ${{ runner.os }}-pip-
    - name: Cache pip packages for MacOS
      if: runner.os == 'macOS'
      uses: actions/cache@v2
      with:
        path: ~/Library/Caches/pip
        key: ${{ matrix.os }}-pip-${{ hashFiles('env.*.yml') }}
        restore-keys: |
          ${{ runner.os }}-pip-
    - name: Cache pip packages for Windows
      if: runner.os == 'Windows'
      uses: actions/cache@v2
      with:
        path: ~\AppData\Local\pip\Cache
        key: ${{ matrix.os }}-pip-${{ hashFiles('env.*.yml') }}
        restore-keys: |
          ${{ runner.os }}-pip-
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Create env.yml
      shell: bash
      run: |
        python -m pip install --upgrade pip;
        pip install conda-merge;
        if [ "$(uname)" == 'Darwin' ]; then
          conda-merge env_jax.yml env.test.yml > env.yml
        else
          conda-merge env_jax.yml env.test.yml > env.yml
        fi;
    - name: Install all dependencies
      uses: conda-incubator/setup-miniconda@v2
      with:
        miniconda-version: "latest"
        auto-update-conda: true
        activate-environment: deepchem
        channels: omnia,conda-forge,defaults
        python-version: ${{ matrix.python-version }}
        environment-file: env.yml
    - name: Install DeepChem
      id: install
      shell: bash -l {0}
      run: pip install -e '.[jax]'
    - name: Yapf (version 0.22.0)
      id: yapf
      # on Windows, yapf raise the strange error..., so ignore
      if: runner.os == 'Linux' || runner.os == 'macOS'
      shell: bash -l {0}
      run: |
        CHANGED_FILES=`git diff --name-only $COMMIT_RANGE | grep .py$ | grep -v contrib/ || true`
        if [ -n "$CHANGED_FILES" ]; then
          yapf -d $CHANGED_FILES
        fi
    - name: Flake8
      if: ${{ (success() || failure()) && (steps.install.outcome == 'failure' || steps.install.outcome == 'success') }}
      shell: bash -l {0}
      run: source scripts/flake8_for_ci.sh
    - name: PyTest
      if: ${{ (success() || failure()) && (steps.install.outcome == 'failure' || steps.install.outcome == 'success') }}
      shell: bash -l {0}
      run: pytest -v -m jax deepchem
+1 −1
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ jobs:
    - name: PyTest
      if: ${{ (success() || failure()) && (steps.install.outcome == 'failure' || steps.install.outcome == 'success') }}
      shell: bash -l {0}
      run: pytest -v -m "not slow" --cov=deepchem --cov-report=xml deepchem
      run: pytest -v -m "not slow and not jax" --cov=deepchem --cov-report=xml deepchem
    - name: Upload coverage to Codecov
      if: ${{ (success() || failure()) && (steps.install.outcome == 'failure' || steps.install.outcome == 'success') }}
      uses: codecov/codecov-action@v1
+1 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ from deepchem.feat.molecule_featurizers import RawFeaturizer
from deepchem.feat.molecule_featurizers import RDKitDescriptors
from deepchem.feat.molecule_featurizers import SmilesToImage
from deepchem.feat.molecule_featurizers import SmilesToSeq, create_char_to_idx
from deepchem.feat.molecule_featurizers import MATFeaturizer

# complex featurizers
from deepchem.feat.complex_featurizers import RdkitGridFeaturizer
+1 −0
Original line number Diff line number Diff line
@@ -17,3 +17,4 @@ from deepchem.feat.molecule_featurizers.smiles_to_seq import SmilesToSeq, create
from deepchem.feat.molecule_featurizers.mol_graph_conv_featurizer import MolGraphConvFeaturizer
from deepchem.feat.molecule_featurizers.mol_graph_conv_featurizer import PagtnMolGraphFeaturizer
from deepchem.feat.molecule_featurizers.molgan_featurizer import MolGanFeaturizer
from deepchem.feat.molecule_featurizers.mat_featurizer import MATFeaturizer
+103 −0
Original line number Diff line number Diff line
from deepchem.feat.base_classes import MolecularFeaturizer
from deepchem.utils.molecule_feature_utils import one_hot_encode
from deepchem.utils.typing import RDKitMol, RDKitAtom
import numpy as np


class MATFeaturizer(MolecularFeaturizer):
  """
  This class is a featurizer for the Molecule Attention Transformer [1]_.
  The featurizer accepts an RDKit Molecule, and a boolean (one_hot_formal_charge) as arguments.
  The returned value is a numpy array which consists of molecular graph descriptions:
    - Node Features
    - Adjacency Matrix
    - Distance Matrix

  References
  ---------
  .. [1] Lukasz Maziarka et al. "Molecule Attention Transformer`<https://arxiv.org/abs/2002.08264>`"

  Examples
  --------
  >>> import deepchem as dc
  >>> feat = dc.feat.MATFeaturizer()
  >>> out = feat.featurize("CCC")

  Note
  ----
  This class requires RDKit to be installed.
  """

  def __init__(
      self,
      one_hot_formal_charge: bool = True,
  ):
    """
    Parameters
    ----------
    one_hot_formal_charge: bool, default True
      If True, formal charges on atoms are one-hot encoded.
    """

    self.one_hot_formal_charge = one_hot_formal_charge

  def atom_features(self, atom: RDKitAtom) -> np.ndarray:
    """
    Deepchem already contains an atom_features function, however we are defining a new one here due to the need to handle features specific to MAT.
    Since we need new features like Atom GetNeighbors and IsInRing, and the number of features required for MAT is a fraction of what the Deepchem atom_features function computes, we can speed up computation by defining a custom function.

    Parameters
    ----------
    atom: RDKitAtom
      RDKit Atom object.

    Returns
    ----------
    Atom_features: ndarray
      Numpy array containing atom features.

    """
    attrib = []
    attrib += one_hot_encode(atom.GetAtomicNum(),
                             [5, 6, 7, 8, 9, 15, 16, 17, 35, 53, 999])
    attrib += one_hot_encode(len(atom.GetNeighbors()), [0, 1, 2, 3, 4, 5])
    attrib += one_hot_encode(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

    if self.one_hot_formal_charge:
      attrib += one_hot_encode(atom.GetFormalCharge(), [-1, 0, 1])
    else:
      attrib.append(atom.GetFormalCharge())

    attrib.append(atom.IsInRing())
    attrib.append(atom.GetIsAromatic())

    return np.array(attrib, dtype=np.float32)

  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """
    Featurize the molecule.

    Parameters
    ----------
    mol: RDKitMol
      RDKit mol object.

    Returns
    -------
    np.ndarray: A concatenated matrix consisting of node_features, adjacency_matrix and distance_matrix.
    """

    try:
      from rdkit import Chem
    except:
      raise ImportError("This class requires RDKit to be installed.")

    node_features = np.array(
        [self.atom_features(atom) for atom in mol.GetAtoms()])
    adjacency_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
    distance_matrix = Chem.rdmolops.GetDistanceMatrix(mol)

    result = np.concatenate(
        [node_features, adjacency_matrix, distance_matrix], axis=1)

    return result
Loading