Added documentation on type annotations (6e3b05d9) · Commits · 钟慕尧 / deepchem

deepchem/models/fcnet.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -16,7 +16,7 @@ from deepchem.metrics import to_one_hot
		from tensorflow.keras.layers import Input, Dense, Reshape, Softmax, Dropout, Activation, Lambda

		from typing import Any, Callable, Iterable, List, Optional, Sequence, Tuple, Union
		from deepchem.utils.typing import ActivationFn, LossFunction, OneOrMany
		from deepchem.utils.typing import KerasActivationFn, KerasLossFn, OneOrMany

		logger = logging.getLogger(__name__)

		@@ -45,7 +45,7 @@ class MultitaskClassifier(KerasModel):
		weight_decay_penalty: float = 0.0,
		weight_decay_penalty_type: str = "l2",
		dropouts: OneOrMany[float] = 0.5,
		activation_fns: OneOrMany[ActivationFn] = tf.nn.relu,
		activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
		n_classes: int = 2,
		residual: bool = False,
		**kwargs) -> None:
		@@ -195,7 +195,7 @@ class MultitaskRegressor(KerasModel):
		weight_decay_penalty: float = 0.0,
		weight_decay_penalty_type: str = "l2",
		dropouts: OneOrMany[float] = 0.5,
		activation_fns: OneOrMany[ActivationFn] = tf.nn.relu,
		activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
		uncertainty: bool = False,
		residual: bool = False,
		**kwargs) -> None:
		@@ -300,7 +300,7 @@ class MultitaskRegressor(KerasModel):
		stddev=weight_init_stddevs[-1]),
		bias_initializer=tf.constant_initializer(
		value=bias_init_consts[-1]))(prev_layer))
		loss: Union[dc.models.losses.Loss, LossFunction]
		loss: Union[dc.models.losses.Loss, KerasLossFn]
		if uncertainty:
		log_var = Reshape((n_tasks, 1))(Dense(
		n_tasks,

deepchem/models/keras_model.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -19,7 +19,7 @@ from deepchem.trans import Transformer, undo_transforms
		from deepchem.utils.evaluate import GeneratorEvaluator

		from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
		from deepchem.utils.typing import LossFunction, OneOrMany
		from deepchem.utils.typing import KerasLossFn, OneOrMany

		try:
		import wandb
		@@ -118,7 +118,7 @@ class KerasModel(Model):

		def __init__(self,
		model: tf.keras.Model,
		loss: Union[Loss, LossFunction],
		loss: Union[Loss, KerasLossFn],
		output_types: Optional[List[str]] = None,
		batch_size: int = 100,
		model_dir: Optional[str] = None,
		@@ -166,7 +166,7 @@ class KerasModel(Model):
		model_instance=model, model_dir=model_dir, **kwargs)
		self.model = model
		if isinstance(loss, Loss):
		self._loss_fn: LossFunction = _StandardLoss(model, loss)
		self._loss_fn: KerasLossFn = _StandardLoss(model, loss)
		else:
		self._loss_fn = loss
		self.batch_size = batch_size
		@@ -271,7 +271,7 @@ class KerasModel(Model):
		deterministic: bool = False,
		restore: bool = False,
		variables: Optional[List[tf.Variable]] = None,
		loss: Optional[LossFunction] = None,
		loss: Optional[KerasLossFn] = None,
		callbacks: Union[Callable, List[Callable]] = []) -> float:
		"""Train this model on a dataset.

		@@ -319,7 +319,7 @@ class KerasModel(Model):
		checkpoint_interval: int = 1000,
		restore: bool = False,
		variables: Optional[List[tf.Variable]] = None,
		loss: Optional[LossFunction] = None,
		loss: Optional[KerasLossFn] = None,
		callbacks: Union[Callable, List[Callable]] = []) -> float:
		"""Train this model on data from a generator.

		@@ -461,7 +461,7 @@ class KerasModel(Model):
		y: Sequence,
		w: Sequence,
		variables: Optional[List[tf.Variable]] = None,
		loss: Optional[LossFunction] = None,
		loss: Optional[KerasLossFn] = None,
		callbacks: Union[Callable, List[Callable]] = [],
		checkpoint: bool = True,
		max_checkpoints_to_keep: int = 5) -> float:

deepchem/utils/typing.py

+10 −2

Original line number	Diff line number	Diff line
		@@ -3,7 +3,15 @@
		from typing import Callable, List, Sequence, Tuple, TypeVar, Union

		T = TypeVar("T")
		ActivationFn = Union[Callable, str]
		LossFunction = Callable[[List, List, List], float]

		# An activation function for a Keras layer: either a TensorFlow function or the name of a standard activation
		KerasActivationFn = Union[Callable, str]

		# A loss function for use with KerasModel: f(outputs, labels, weights)
		KerasLossFn = Callable[[List, List, List], float]

		# A single value of some type, or multiple values of that type
		OneOrMany = Union[T, Sequence[T]]

		# The shape of a NumPy array
		Shape = Tuple[int, ...]

docs/coding.rst

0 → 100644

+100 −0

Original line number	Diff line number	Diff line
		Coding Conventions
		==================

		Code Formatting
		---------------

		.. _`yapf`: https://github.com/google/yapf

		We use `yapf`_ to format all of the code in DeepChem. Although it sometimes
		produces slightly awkward formatting, it does have two major benefits. First,
		it ensures complete consistency throughout the entire codebase. And second, it
		avoids disagreements about how a piece of code should be formatted.

		Whenever you modify a file, run :code:`yapf` on it to reformat it before
		checking it in.

		.. code-block:: bash

		yapf -i <modified file>

		Yapf is run on every pull request to make sure the formatting is correct, so if
		you forget to do this the continuous integration system will remind you.


		Docstrings
		----------

		All classes and functions should include docstrings describing their purpose and
		intended usage. When in doubt about how much information to include, always err
		on the side of including more rather than less. Explain what problem a class is
		intended to solve, what algorithms it uses, and how to use it correctly. When
		appropriate, cite the relevant publications.

		.. _`numpy`: https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard

		All docstrings should follow the `numpy`_ docstring formatting conventions.


		Unit Tests
		----------

		Having an extensive collection of test cases is essential to ensure the code
		works correctly. If you haven't written tests for a feature, that means the
		feature isn't finished yet. Untested code is code that probably doesn't work.

		Complex numerical code is sometimes challenging to fully test. When an
		algorithm produces a result, it sometimes is not obvious how to tell whether the
		result is correct or not. As far as possible, try to find simple examples for
		which the correct answer is exactly known. Sometimes we rely on stochastic
		tests which will probably pass if the code is correct and probably fail if
		the code is broken. This means these tests are expected to fail a small
		fraction of the time. Such tests can be marked with the :code:`@flaky`
		annotation. If they fail during continuous integration, they will be run a
		second time and an error only reported if they fail again.

		If possible, each test should run in no more than a few seconds. Occasionally
		this is not possible. In that case, mark the test with the :code:`@pytest.mark.slow`
		annotation. Slow tests are skipped during continuous integration, so changes
		that break them may sometimes slip through and get merged into the repository.
		We still try to run them regularly, so hopefully the problem will be discovered
		fairly soon.


		Type Annotations
		----------------

		Type annotations are an important tool for avoiding bugs. All new code should
		provide type annotations for function arguments and return types. When you make
		significant changes to existing code that does not have type annotations, please
		consider adding them at the same time.

		.. _`mypy`: http://mypy-lang.org/

		We use the `mypy`_ static type checker to verify code correctness. It is
		automatically run on every pull request. If you want to run it locally to make
		sure you are using types correctly before checking in your code, :code:`cd` to
		the top level directory of the repository and execute the command

		.. code-block:: bash

		mypy -p deepchem --ignore-missing-imports

		Because Python is such a dynamic language, it sometimes is not obvious what type
		to specify. A good rule of thumb is to be permissive about input types and
		strict about output types. For example, many functions are documented as taking
		a list as an argument, but actually work just as well with a tuple. In those
		cases, it is best to specify the input type as :code:`Sequence` to accept either
		one. But if a function returns a list, specify the type as :code:`List` because
		we can guarantee the return value will always have that exact type.

		Another important case is NumPy arrays. Many functions are documented as taking
		an array, but actually can accept any array-like object: a list of numbers, a
		list of lists of numbers, a list of arrays, etc. In that case, specify the type
		as :code:`Sequence` to accept any of these. On the other hand, if the function
		truly requires an array and will fail with any other input, specify it as
		:code:`np.ndarray`.

		The :code:`deepchem.utils.typing` module contains definitions of some types that
		appear frequently in the DeepChem API. You may find them useful when annotating
		code.

docs/index.rst

+6 −5

Original line number	Diff line number	Diff line
		@@ -140,3 +140,4 @@ discussions about research, development or any general questions. If you'd like
		Reinforcement Learning <rl>
		Docking <docking>
		Utilities <utils>
		Coding Conventions <coding>

Admin message