Skip to content

Commit

Permalink
feat: way to only recompile changed files (#2643)
Browse files Browse the repository at this point in the history
* feat: lazy compile

* refactor: lazy -> only_changed

* refactor: leave the changed function up to the user

* refactor: pass a function, based on @YannickJadoul and @HDembinski's suggestions

* refactor: old -> _old, as it's not intended for users

* docs: slight improvmenent from @rwgk

* docs: Ccache spelling, extra warning about pip caching

Ccache spelling noted by @YannickJadoul
  • Loading branch information
henryiii authored Nov 11, 2020
1 parent c58758d commit ebd5c5b
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ repos:
- id: disallow-caps
name: Disallow improper capitalization
language: pygrep
entry: PyBind|Numpy|Cmake
entry: PyBind|Numpy|Cmake|CCache
exclude: .pre-commit-config.yaml

- repo: local
Expand Down
30 changes: 30 additions & 0 deletions docs/compiling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,36 @@ default number of threads (0 will take the number of threads available) and
``max=N``, the maximum number of threads; if you have a large extension you may
want set this to a memory dependent number.

If you are developing rapidly and have a lot of C++ files, you may want to
avoid rebuilding files that have not changed. For simple cases were you are
using ``pip install -e .`` and do not have local headers, you can skip the
rebuild if a object file is newer than it's source (headers are not checked!)
with the following:

.. code-block:: python
from pybind11.setup_helpers import ParallelCompile, naive_recompile
SmartCompile("NPY_NUM_BUILD_JOBS", needs_recompile=naive_recompile).install()
If you have a more complex build, you can implement a smarter function and pass
it to ``needs_recompile``, or you can use [Ccache]_ instead. ``CXX="cache g++"
pip install -e .`` would be the way to use it with GCC, for example. Unlike the
simple solution, this even works even when not compiling in editable mode, but
it does require Ccache to be installed.

Keep in mind that Pip will not even attempt to rebuild if it thinks it has
already built a copy of your code, which it deduces from the version number.
One way to avoid this is to use [setuptools_scm]_, which will generate a
version number that includes the number of commits since your last tag and a
hash for a dirty directory. Another way to force a rebuild is purge your cache
or use Pip's ``--no-cache-dir`` option.

.. [Ccache] https://ccache.dev
.. [setuptools_scm] https://github.com/pypa/setuptools_scm
.. _setup_helpers-pep518:

PEP 518 requirements (Pip 10+ required)
Expand Down
62 changes: 50 additions & 12 deletions pybind11/setup_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ def auto_cpp_level(compiler):
class build_ext(_build_ext): # noqa: N801
"""
Customized build_ext that allows an auto-search for the highest supported
C++ level for Pybind11Extension.
C++ level for Pybind11Extension. This is only needed for the auto-search
for now, and is completely optional otherwise.
"""

def build_extensions(self):
Expand All @@ -293,6 +294,23 @@ def build_extensions(self):
_build_ext.build_extensions(self)


def naive_recompile(obj, src):
"""
This will recompile only if the source file changes. It does not check
header files, so a more advanced function or Ccache is better if you have
editable header files in your package.
"""
return os.stat(obj).st_mtime < os.stat(src).st_mtime


def no_recompile(obg, src):
"""
This is the safest but slowest choice (and is the default) - will always
recompile sources.
"""
return True


# Optional parallel compile utility
# inspired by: http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
# and: https://github.com/tbenthompson/cppimport/blob/stable/cppimport/build_module.py
Expand All @@ -306,24 +324,42 @@ class ParallelCompile(object):
This takes several arguments that allow you to customize the compile
function created:
envvar: Set an environment variable to control the compilation threads, like NPY_NUM_BUILD_JOBS
default: 0 will automatically multithread, or 1 will only multithread if the envvar is set.
max: The limit for automatic multithreading if non-zero
envvar:
Set an environment variable to control the compilation threads, like
NPY_NUM_BUILD_JOBS
default:
0 will automatically multithread, or 1 will only multithread if the
envvar is set.
max:
The limit for automatic multithreading if non-zero
needs_recompile:
A function of (obj, src) that returns True when recompile is needed. No
effect in isolated mode; use ccache instead, see
https://github.com/matplotlib/matplotlib/issues/1507/
To use::
To use:
ParallelCompile("NPY_NUM_BUILD_JOBS").install()
or:
or::
with ParallelCompile("NPY_NUM_BUILD_JOBS"):
setup(...)
By default, this assumes all files need to be recompiled. A smarter
function can be provided via needs_recompile. If the output has not yet
been generated, the compile will always run, and this function is not
called.
"""

__slots__ = ("envvar", "default", "max", "old")
__slots__ = ("envvar", "default", "max", "_old", "needs_recompile")

def __init__(self, envvar=None, default=0, max=0):
def __init__(self, envvar=None, default=0, max=0, needs_recompile=no_recompile):
self.envvar = envvar
self.default = default
self.max = max
self.old = []
self.needs_recompile = needs_recompile
self._old = []

def function(self):
"""
Expand Down Expand Up @@ -360,7 +396,9 @@ def _single_compile(obj):
src, ext = build[obj]
except KeyError:
return
compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)

if not os.path.exists(obj) or self.needs_recompile(obj, src):
compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)

try:
import multiprocessing
Expand Down Expand Up @@ -391,8 +429,8 @@ def install(self):
return self

def __enter__(self):
self.old.append(distutils.ccompiler.CCompiler.compile)
self._old.append(distutils.ccompiler.CCompiler.compile)
return self.install()

def __exit__(self, *args):
distutils.ccompiler.CCompiler.compile = self.old.pop()
distutils.ccompiler.CCompiler.compile = self._old.pop()
17 changes: 14 additions & 3 deletions pybind11/setup_helpers.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# IMPORTANT: Should stay in sync with setup_helpers.py (mostly checked by CI /
# pre-commit).

from typing import Any, Iterator, Optional, Type, TypeVar, Union
from typing import Any, Callable, Iterator, Optional, Type, TypeVar, Union
from types import TracebackType

from distutils.command.build_ext import build_ext as _build_ext # type: ignore
Expand Down Expand Up @@ -33,12 +33,23 @@ def auto_cpp_level(compiler: distutils.ccompiler.CCompiler) -> Union[int, str]:
class build_ext(_build_ext): # type: ignore
def build_extensions(self) -> None: ...

def no_recompile(obj: str, src: str) -> bool: ...
def naive_recompile(obj: str, src: str) -> bool: ...

T = TypeVar("T", bound="ParallelCompile")

class ParallelCompile:
envvar: Optional[str]
default: int
max: int
needs_recompile: Callable[[str, str], bool]
def __init__(
self, envvar: Optional[str] = None, default: int = 0, max: int = 0
): ...
self,
envvar: Optional[str] = None,
default: int = 0,
max: int = 0,
needs_recompile: Callable[[str, str], bool] = no_recompile,
) -> None: ...
def function(self) -> Any: ...
def install(self: T) -> T: ...
def __enter__(self: T) -> T: ...
Expand Down

0 comments on commit ebd5c5b

Please sign in to comment.