Spaces:
Running
Running
"""Automatic discovery of Python modules and packages (for inclusion in the | |
distribution) and other config values. | |
For the purposes of this module, the following nomenclature is used: | |
- "src-layout": a directory representing a Python project that contains a "src" | |
folder. Everything under the "src" folder is meant to be included in the | |
distribution when packaging the project. Example:: | |
. | |
βββ tox.ini | |
βββ pyproject.toml | |
βββ src/ | |
βββ mypkg/ | |
βββ __init__.py | |
βββ mymodule.py | |
βββ my_data_file.txt | |
- "flat-layout": a Python project that does not use "src-layout" but instead | |
have a directory under the project root for each package:: | |
. | |
βββ tox.ini | |
βββ pyproject.toml | |
βββ mypkg/ | |
βββ __init__.py | |
βββ mymodule.py | |
βββ my_data_file.txt | |
- "single-module": a project that contains a single Python script direct under | |
the project root (no directory used):: | |
. | |
βββ tox.ini | |
βββ pyproject.toml | |
βββ mymodule.py | |
""" | |
import itertools | |
import os | |
from fnmatch import fnmatchcase | |
from glob import glob | |
from pathlib import Path | |
from typing import ( | |
TYPE_CHECKING, | |
Dict, | |
Iterable, | |
Iterator, | |
List, | |
Mapping, | |
Optional, | |
Tuple, | |
Union | |
) | |
import _distutils_hack.override # noqa: F401 | |
from distutils import log | |
from distutils.util import convert_path | |
_Path = Union[str, os.PathLike] | |
StrIter = Iterator[str] | |
chain_iter = itertools.chain.from_iterable | |
if TYPE_CHECKING: | |
from setuptools import Distribution # noqa | |
def _valid_name(path: _Path) -> bool: | |
# Ignore invalid names that cannot be imported directly | |
return os.path.basename(path).isidentifier() | |
class _Filter: | |
""" | |
Given a list of patterns, create a callable that will be true only if | |
the input matches at least one of the patterns. | |
""" | |
def __init__(self, *patterns: str): | |
self._patterns = dict.fromkeys(patterns) | |
def __call__(self, item: str) -> bool: | |
return any(fnmatchcase(item, pat) for pat in self._patterns) | |
def __contains__(self, item: str) -> bool: | |
return item in self._patterns | |
class _Finder: | |
"""Base class that exposes functionality for module/package finders""" | |
ALWAYS_EXCLUDE: Tuple[str, ...] = () | |
DEFAULT_EXCLUDE: Tuple[str, ...] = () | |
def find( | |
cls, | |
where: _Path = '.', | |
exclude: Iterable[str] = (), | |
include: Iterable[str] = ('*',) | |
) -> List[str]: | |
"""Return a list of all Python items (packages or modules, depending on | |
the finder implementation) found within directory 'where'. | |
'where' is the root directory which will be searched. | |
It should be supplied as a "cross-platform" (i.e. URL-style) path; | |
it will be converted to the appropriate local path syntax. | |
'exclude' is a sequence of names to exclude; '*' can be used | |
as a wildcard in the names. | |
When finding packages, 'foo.*' will exclude all subpackages of 'foo' | |
(but not 'foo' itself). | |
'include' is a sequence of names to include. | |
If it's specified, only the named items will be included. | |
If it's not specified, all found items will be included. | |
'include' can contain shell style wildcard patterns just like | |
'exclude'. | |
""" | |
exclude = exclude or cls.DEFAULT_EXCLUDE | |
return list( | |
cls._find_iter( | |
convert_path(str(where)), | |
_Filter(*cls.ALWAYS_EXCLUDE, *exclude), | |
_Filter(*include), | |
) | |
) | |
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: | |
raise NotImplementedError | |
class PackageFinder(_Finder): | |
""" | |
Generate a list of all Python packages found within a directory | |
""" | |
ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__") | |
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: | |
""" | |
All the packages found in 'where' that pass the 'include' filter, but | |
not the 'exclude' filter. | |
""" | |
for root, dirs, files in os.walk(str(where), followlinks=True): | |
# Copy dirs to iterate over it, then empty dirs. | |
all_dirs = dirs[:] | |
dirs[:] = [] | |
for dir in all_dirs: | |
full_path = os.path.join(root, dir) | |
rel_path = os.path.relpath(full_path, where) | |
package = rel_path.replace(os.path.sep, '.') | |
# Skip directory trees that are not valid packages | |
if '.' in dir or not cls._looks_like_package(full_path, package): | |
continue | |
# Should this package be included? | |
if include(package) and not exclude(package): | |
yield package | |
# Early pruning if there is nothing else to be scanned | |
if f"{package}*" in exclude or f"{package}.*" in exclude: | |
continue | |
# Keep searching subdirectories, as there may be more packages | |
# down there, even if the parent was excluded. | |
dirs.append(dir) | |
def _looks_like_package(path: _Path, _package_name: str) -> bool: | |
"""Does a directory look like a package?""" | |
return os.path.isfile(os.path.join(path, '__init__.py')) | |
class PEP420PackageFinder(PackageFinder): | |
def _looks_like_package(_path: _Path, _package_name: str) -> bool: | |
return True | |
class ModuleFinder(_Finder): | |
"""Find isolated Python modules. | |
This function will **not** recurse subdirectories. | |
""" | |
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: | |
for file in glob(os.path.join(where, "*.py")): | |
module, _ext = os.path.splitext(os.path.basename(file)) | |
if not cls._looks_like_module(module): | |
continue | |
if include(module) and not exclude(module): | |
yield module | |
_looks_like_module = staticmethod(_valid_name) | |
# We have to be extra careful in the case of flat layout to not include files | |
# and directories not meant for distribution (e.g. tool-related) | |
class FlatLayoutPackageFinder(PEP420PackageFinder): | |
_EXCLUDE = ( | |
"ci", | |
"bin", | |
"doc", | |
"docs", | |
"documentation", | |
"manpages", | |
"news", | |
"changelog", | |
"test", | |
"tests", | |
"unit_test", | |
"unit_tests", | |
"example", | |
"examples", | |
"scripts", | |
"tools", | |
"util", | |
"utils", | |
"python", | |
"build", | |
"dist", | |
"venv", | |
"env", | |
"requirements", | |
# ---- Task runners / Build tools ---- | |
"tasks", # invoke | |
"fabfile", # fabric | |
"site_scons", # SCons | |
# ---- Other tools ---- | |
"benchmark", | |
"benchmarks", | |
"exercise", | |
"exercises", | |
"htmlcov", # Coverage.py | |
# ---- Hidden directories/Private packages ---- | |
"[._]*", | |
) | |
DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE)) | |
"""Reserved package names""" | |
def _looks_like_package(_path: _Path, package_name: str) -> bool: | |
names = package_name.split('.') | |
# Consider PEP 561 | |
root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs") | |
return root_pkg_is_valid and all(name.isidentifier() for name in names[1:]) | |
class FlatLayoutModuleFinder(ModuleFinder): | |
DEFAULT_EXCLUDE = ( | |
"setup", | |
"conftest", | |
"test", | |
"tests", | |
"example", | |
"examples", | |
"build", | |
# ---- Task runners ---- | |
"toxfile", | |
"noxfile", | |
"pavement", | |
"dodo", | |
"tasks", | |
"fabfile", | |
# ---- Other tools ---- | |
"[Ss][Cc]onstruct", # SCons | |
"conanfile", # Connan: C/C++ build tool | |
"manage", # Django | |
"benchmark", | |
"benchmarks", | |
"exercise", | |
"exercises", | |
# ---- Hidden files/Private modules ---- | |
"[._]*", | |
) | |
"""Reserved top-level module names""" | |
def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]: | |
nested = PEP420PackageFinder.find(pkg_dir) | |
return [root_pkg] + [".".join((root_pkg, n)) for n in nested] | |
class ConfigDiscovery: | |
"""Fill-in metadata and options that can be automatically derived | |
(from other metadata/options, the file system or conventions) | |
""" | |
def __init__(self, distribution: "Distribution"): | |
self.dist = distribution | |
self._called = False | |
self._disabled = False | |
self._skip_ext_modules = False | |
def _disable(self): | |
"""Internal API to disable automatic discovery""" | |
self._disabled = True | |
def _ignore_ext_modules(self): | |
"""Internal API to disregard ext_modules. | |
Normally auto-discovery would not be triggered if ``ext_modules`` are set | |
(this is done for backward compatibility with existing packages relying on | |
``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function | |
to ignore given ``ext_modules`` and proceed with the auto-discovery if | |
``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml | |
metadata). | |
""" | |
self._skip_ext_modules = True | |
def _root_dir(self) -> _Path: | |
# The best is to wait until `src_root` is set in dist, before using _root_dir. | |
return self.dist.src_root or os.curdir | |
def _package_dir(self) -> Dict[str, str]: | |
if self.dist.package_dir is None: | |
return {} | |
return self.dist.package_dir | |
def __call__(self, force=False, name=True, ignore_ext_modules=False): | |
"""Automatically discover missing configuration fields | |
and modifies the given ``distribution`` object in-place. | |
Note that by default this will only have an effect the first time the | |
``ConfigDiscovery`` object is called. | |
To repeatedly invoke automatic discovery (e.g. when the project | |
directory changes), please use ``force=True`` (or create a new | |
``ConfigDiscovery`` instance). | |
""" | |
if force is False and (self._called or self._disabled): | |
# Avoid overhead of multiple calls | |
return | |
self._analyse_package_layout(ignore_ext_modules) | |
if name: | |
self.analyse_name() # depends on ``packages`` and ``py_modules`` | |
self._called = True | |
def _explicitly_specified(self, ignore_ext_modules: bool) -> bool: | |
"""``True`` if the user has specified some form of package/module listing""" | |
ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules | |
ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules) | |
return ( | |
self.dist.packages is not None | |
or self.dist.py_modules is not None | |
or ext_modules | |
or hasattr(self.dist, "configuration") and self.dist.configuration | |
# ^ Some projects use numpy.distutils.misc_util.Configuration | |
) | |
def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool: | |
if self._explicitly_specified(ignore_ext_modules): | |
# For backward compatibility, just try to find modules/packages | |
# when nothing is given | |
return True | |
log.debug( | |
"No `packages` or `py_modules` configuration, performing " | |
"automatic discovery." | |
) | |
return ( | |
self._analyse_explicit_layout() | |
or self._analyse_src_layout() | |
# flat-layout is the trickiest for discovery so it should be last | |
or self._analyse_flat_layout() | |
) | |
def _analyse_explicit_layout(self) -> bool: | |
"""The user can explicitly give a package layout via ``package_dir``""" | |
package_dir = self._package_dir.copy() # don't modify directly | |
package_dir.pop("", None) # This falls under the "src-layout" umbrella | |
root_dir = self._root_dir | |
if not package_dir: | |
return False | |
log.debug(f"`explicit-layout` detected -- analysing {package_dir}") | |
pkgs = chain_iter( | |
_find_packages_within(pkg, os.path.join(root_dir, parent_dir)) | |
for pkg, parent_dir in package_dir.items() | |
) | |
self.dist.packages = list(pkgs) | |
log.debug(f"discovered packages -- {self.dist.packages}") | |
return True | |
def _analyse_src_layout(self) -> bool: | |
"""Try to find all packages or modules under the ``src`` directory | |
(or anything pointed by ``package_dir[""]``). | |
The "src-layout" is relatively safe for automatic discovery. | |
We assume that everything within is meant to be included in the | |
distribution. | |
If ``package_dir[""]`` is not given, but the ``src`` directory exists, | |
this function will set ``package_dir[""] = "src"``. | |
""" | |
package_dir = self._package_dir | |
src_dir = os.path.join(self._root_dir, package_dir.get("", "src")) | |
if not os.path.isdir(src_dir): | |
return False | |
log.debug(f"`src-layout` detected -- analysing {src_dir}") | |
package_dir.setdefault("", os.path.basename(src_dir)) | |
self.dist.package_dir = package_dir # persist eventual modifications | |
self.dist.packages = PEP420PackageFinder.find(src_dir) | |
self.dist.py_modules = ModuleFinder.find(src_dir) | |
log.debug(f"discovered packages -- {self.dist.packages}") | |
log.debug(f"discovered py_modules -- {self.dist.py_modules}") | |
return True | |
def _analyse_flat_layout(self) -> bool: | |
"""Try to find all packages and modules under the project root. | |
Since the ``flat-layout`` is more dangerous in terms of accidentally including | |
extra files/directories, this function is more conservative and will raise an | |
error if multiple packages or modules are found. | |
This assumes that multi-package dists are uncommon and refuse to support that | |
use case in order to be able to prevent unintended errors. | |
""" | |
log.debug(f"`flat-layout` detected -- analysing {self._root_dir}") | |
return self._analyse_flat_packages() or self._analyse_flat_modules() | |
def _analyse_flat_packages(self) -> bool: | |
self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir) | |
top_level = remove_nested_packages(remove_stubs(self.dist.packages)) | |
log.debug(f"discovered packages -- {self.dist.packages}") | |
self._ensure_no_accidental_inclusion(top_level, "packages") | |
return bool(top_level) | |
def _analyse_flat_modules(self) -> bool: | |
self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir) | |
log.debug(f"discovered py_modules -- {self.dist.py_modules}") | |
self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules") | |
return bool(self.dist.py_modules) | |
def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str): | |
if len(detected) > 1: | |
from inspect import cleandoc | |
from setuptools.errors import PackageDiscoveryError | |
msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}. | |
To avoid accidental inclusion of unwanted files or directories, | |
setuptools will not proceed with this build. | |
If you are trying to create a single distribution with multiple {kind} | |
on purpose, you should not rely on automatic discovery. | |
Instead, consider the following options: | |
1. set up custom discovery (`find` directive with `include` or `exclude`) | |
2. use a `src-layout` | |
3. explicitly set `py_modules` or `packages` with a list of names | |
To find more information, look for "package discovery" on setuptools docs. | |
""" | |
raise PackageDiscoveryError(cleandoc(msg)) | |
def analyse_name(self): | |
"""The packages/modules are the essential contribution of the author. | |
Therefore the name of the distribution can be derived from them. | |
""" | |
if self.dist.metadata.name or self.dist.name: | |
# get_name() is not reliable (can return "UNKNOWN") | |
return None | |
log.debug("No `name` configuration, performing automatic discovery") | |
name = ( | |
self._find_name_single_package_or_module() | |
or self._find_name_from_packages() | |
) | |
if name: | |
self.dist.metadata.name = name | |
def _find_name_single_package_or_module(self) -> Optional[str]: | |
"""Exactly one module or package""" | |
for field in ('packages', 'py_modules'): | |
items = getattr(self.dist, field, None) or [] | |
if items and len(items) == 1: | |
log.debug(f"Single module/package detected, name: {items[0]}") | |
return items[0] | |
return None | |
def _find_name_from_packages(self) -> Optional[str]: | |
"""Try to find the root package that is not a PEP 420 namespace""" | |
if not self.dist.packages: | |
return None | |
packages = remove_stubs(sorted(self.dist.packages, key=len)) | |
package_dir = self.dist.package_dir or {} | |
parent_pkg = find_parent_package(packages, package_dir, self._root_dir) | |
if parent_pkg: | |
log.debug(f"Common parent package detected, name: {parent_pkg}") | |
return parent_pkg | |
log.warn("No parent package detected, impossible to derive `name`") | |
return None | |
def remove_nested_packages(packages: List[str]) -> List[str]: | |
"""Remove nested packages from a list of packages. | |
>>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"]) | |
['a'] | |
>>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"]) | |
['a', 'b', 'c.d', 'g.h'] | |
""" | |
pkgs = sorted(packages, key=len) | |
top_level = pkgs[:] | |
size = len(pkgs) | |
for i, name in enumerate(reversed(pkgs)): | |
if any(name.startswith(f"{other}.") for other in top_level): | |
top_level.pop(size - i - 1) | |
return top_level | |
def remove_stubs(packages: List[str]) -> List[str]: | |
"""Remove type stubs (:pep:`561`) from a list of packages. | |
>>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"]) | |
['a', 'a.b', 'b'] | |
""" | |
return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")] | |
def find_parent_package( | |
packages: List[str], package_dir: Mapping[str, str], root_dir: _Path | |
) -> Optional[str]: | |
"""Find the parent package that is not a namespace.""" | |
packages = sorted(packages, key=len) | |
common_ancestors = [] | |
for i, name in enumerate(packages): | |
if not all(n.startswith(f"{name}.") for n in packages[i+1:]): | |
# Since packages are sorted by length, this condition is able | |
# to find a list of all common ancestors. | |
# When there is divergence (e.g. multiple root packages) | |
# the list will be empty | |
break | |
common_ancestors.append(name) | |
for name in common_ancestors: | |
pkg_path = find_package_path(name, package_dir, root_dir) | |
init = os.path.join(pkg_path, "__init__.py") | |
if os.path.isfile(init): | |
return name | |
return None | |
def find_package_path( | |
name: str, package_dir: Mapping[str, str], root_dir: _Path | |
) -> str: | |
"""Given a package name, return the path where it should be found on | |
disk, considering the ``package_dir`` option. | |
>>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".") | |
>>> path.replace(os.sep, "/") | |
'./root/is/nested/my/pkg' | |
>>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".") | |
>>> path.replace(os.sep, "/") | |
'./root/is/nested/pkg' | |
>>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".") | |
>>> path.replace(os.sep, "/") | |
'./root/is/nested' | |
>>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".") | |
>>> path.replace(os.sep, "/") | |
'./other/pkg' | |
""" | |
parts = name.split(".") | |
for i in range(len(parts), 0, -1): | |
# Look backwards, the most specific package_dir first | |
partial_name = ".".join(parts[:i]) | |
if partial_name in package_dir: | |
parent = package_dir[partial_name] | |
return os.path.join(root_dir, parent, *parts[i:]) | |
parent = package_dir.get("") or "" | |
return os.path.join(root_dir, *parent.split("/"), *parts) | |
def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]: | |
parent_pkgs = remove_nested_packages(packages) | |
prefix = Path(package_path).parts | |
return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs} | |