Commit 6e3b05d9 authored by peastman's avatar peastman
Browse files

Added documentation on type annotations

parent d65c8b2a
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ from deepchem.metrics import to_one_hot
from tensorflow.keras.layers import Input, Dense, Reshape, Softmax, Dropout, Activation, Lambda

from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union
from deepchem.utils.typing import ActivationFn, LossFunction, OneOrMany
from deepchem.utils.typing import KerasActivationFn, KerasLossFn, OneOrMany

logger = logging.getLogger(__name__)

@@ -45,7 +45,7 @@ class MultitaskClassifier(KerasModel):
               weight_decay_penalty: float = 0.0,
               weight_decay_penalty_type: str = "l2",
               dropouts: OneOrMany[float] = 0.5,
               activation_fns: OneOrMany[ActivationFn] = tf.nn.relu,
               activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
               n_classes: int = 2,
               residual: bool = False,
               **kwargs) -> None:
@@ -195,7 +195,7 @@ class MultitaskRegressor(KerasModel):
               weight_decay_penalty: float = 0.0,
               weight_decay_penalty_type: str = "l2",
               dropouts: OneOrMany[float] = 0.5,
               activation_fns: OneOrMany[ActivationFn] = tf.nn.relu,
               activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
               uncertainty: bool = False,
               residual: bool = False,
               **kwargs) -> None:
@@ -300,7 +300,7 @@ class MultitaskRegressor(KerasModel):
            stddev=weight_init_stddevs[-1]),
        bias_initializer=tf.constant_initializer(
            value=bias_init_consts[-1]))(prev_layer))
    loss: Union[dc.models.losses.Loss, LossFunction]
    loss: Union[dc.models.losses.Loss, KerasLossFn]
    if uncertainty:
      log_var = Reshape((n_tasks, 1))(Dense(
          n_tasks,
+6 −6
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ from deepchem.trans import Transformer, undo_transforms
from deepchem.utils.evaluate import GeneratorEvaluator

from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
from deepchem.utils.typing import LossFunction, OneOrMany
from deepchem.utils.typing import KerasLossFn, OneOrMany

try:
  import wandb
@@ -118,7 +118,7 @@ class KerasModel(Model):

  def __init__(self,
               model: tf.keras.Model,
               loss: Union[Loss, LossFunction],
               loss: Union[Loss, KerasLossFn],
               output_types: Optional[List[str]] = None,
               batch_size: int = 100,
               model_dir: Optional[str] = None,
@@ -166,7 +166,7 @@ class KerasModel(Model):
        model_instance=model, model_dir=model_dir, **kwargs)
    self.model = model
    if isinstance(loss, Loss):
      self._loss_fn: LossFunction = _StandardLoss(model, loss)
      self._loss_fn: KerasLossFn = _StandardLoss(model, loss)
    else:
      self._loss_fn = loss
    self.batch_size = batch_size
@@ -271,7 +271,7 @@ class KerasModel(Model):
          deterministic: bool = False,
          restore: bool = False,
          variables: Optional[List[tf.Variable]] = None,
          loss: Optional[LossFunction] = None,
          loss: Optional[KerasLossFn] = None,
          callbacks: Union[Callable, List[Callable]] = []) -> float:
    """Train this model on a dataset.

@@ -319,7 +319,7 @@ class KerasModel(Model):
                    checkpoint_interval: int = 1000,
                    restore: bool = False,
                    variables: Optional[List[tf.Variable]] = None,
                    loss: Optional[LossFunction] = None,
                    loss: Optional[KerasLossFn] = None,
                    callbacks: Union[Callable, List[Callable]] = []) -> float:
    """Train this model on data from a generator.

@@ -461,7 +461,7 @@ class KerasModel(Model):
                   y: Sequence,
                   w: Sequence,
                   variables: Optional[List[tf.Variable]] = None,
                   loss: Optional[LossFunction] = None,
                   loss: Optional[KerasLossFn] = None,
                   callbacks: Union[Callable, List[Callable]] = [],
                   checkpoint: bool = True,
                   max_checkpoints_to_keep: int = 5) -> float:
+10 −2
Original line number Diff line number Diff line
@@ -3,7 +3,15 @@
from typing import Callable, List, Sequence, Tuple, TypeVar, Union

T = TypeVar("T")
ActivationFn = Union[Callable, str]
LossFunction = Callable[[List, List, List], float]

# An activation function for a Keras layer: either a TensorFlow function or the name of a standard activation
KerasActivationFn = Union[Callable, str]

# A loss function for use with KerasModel: f(outputs, labels, weights)
KerasLossFn = Callable[[List, List, List], float]

# A single value of some type, or multiple values of that type
OneOrMany = Union[T, Sequence[T]]

# The shape of a NumPy array
Shape = Tuple[int, ...]

docs/coding.rst

0 → 100644
+100 −0
Original line number Diff line number Diff line
Coding Conventions
==================

Code Formatting
---------------

.. _`yapf`: https://github.com/google/yapf

We use `yapf`_ to format all of the code in DeepChem.  Although it sometimes
produces slightly awkward formatting, it does have two major benefits.  First,
it ensures complete consistency throughout the entire codebase.  And second, it
avoids disagreements about how a piece of code should be formatted.

Whenever you modify a file, run :code:`yapf` on it to reformat it before
checking it in.

.. code-block:: bash

  yapf -i <modified file>

Yapf is run on every pull request to make sure the formatting is correct, so if
you forget to do this the continuous integration system will remind you.


Docstrings
----------

All classes and functions should include docstrings describing their purpose and
intended usage.  When in doubt about how much information to include, always err
on the side of including more rather than less.  Explain what problem a class is
intended to solve, what algorithms it uses, and how to use it correctly.  When
appropriate, cite the relevant publications.

.. _`numpy`: https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard

All docstrings should follow the `numpy`_ docstring formatting conventions.


Unit Tests
----------

Having an extensive collection of test cases is essential to ensure the code
works correctly.  If you haven't written tests for a feature, that means the
feature isn't finished yet.  Untested code is code that probably doesn't work.

Complex numerical code is sometimes challenging to fully test.  When an
algorithm produces a result, it sometimes is not obvious how to tell whether the
result is correct or not.  As far as possible, try to find simple examples for
which the correct answer is exactly known.  Sometimes we rely on stochastic
tests which will *probably* pass if the code is correct and *probably* fail if
the code is broken.  This means these tests are expected to fail a small
fraction of the time.  Such tests can be marked with the :code:`@flaky`
annotation.  If they fail during continuous integration, they will be run a
second time and an error only reported if they fail again.

If possible, each test should run in no more than a few seconds.  Occasionally
this is not possible.  In that case, mark the test with the :code:`@pytest.mark.slow`
annotation.  Slow tests are skipped during continuous integration, so changes
that break them may sometimes slip through and get merged into the repository.
We still try to run them regularly, so hopefully the problem will be discovered
fairly soon.


Type Annotations
----------------

Type annotations are an important tool for avoiding bugs.  All new code should
provide type annotations for function arguments and return types.  When you make
significant changes to existing code that does not have type annotations, please
consider adding them at the same time.

.. _`mypy`: http://mypy-lang.org/

We use the `mypy`_ static type checker to verify code correctness.  It is
automatically run on every pull request.  If you want to run it locally to make
sure you are using types correctly before checking in your code, :code:`cd` to
the top level directory of the repository and execute the command

.. code-block:: bash

  mypy -p deepchem --ignore-missing-imports

Because Python is such a dynamic language, it sometimes is not obvious what type
to specify.  A good rule of thumb is to be permissive about input types and
strict about output types.  For example, many functions are documented as taking
a list as an argument, but actually work just as well with a tuple.  In those
cases, it is best to specify the input type as :code:`Sequence` to accept either
one.  But if a function returns a list, specify the type as :code:`List` because
we can guarantee the return value will always have that exact type.

Another important case is NumPy arrays.  Many functions are documented as taking
an array, but actually can accept any array-like object: a list of numbers, a
list of lists of numbers, a list of arrays, etc.  In that case, specify the type
as :code:`Sequence` to accept any of these.  On the other hand, if the function
truly requires an array and will fail with any other input, specify it as
:code:`np.ndarray`.

The :code:`deepchem.utils.typing` module contains definitions of some types that
appear frequently in the DeepChem API.  You may find them useful when annotating
code.
+6 −5
Original line number Diff line number Diff line
@@ -140,3 +140,4 @@ discussions about research, development or any general questions. If you'd like
   Reinforcement Learning <rl>
   Docking <docking>
   Utilities <utils>
   Coding Conventions <coding>