|
"""Automatic discovery of Python modules and packages (for inclusion in the |
|
distribution) and other config values. |
|
|
|
For the purposes of this module, the following nomenclature is used: |
|
|
|
- "src-layout": a directory representing a Python project that contains a "src" |
|
folder. Everything under the "src" folder is meant to be included in the |
|
distribution when packaging the project. Example:: |
|
|
|
. |
|
βββ tox.ini |
|
βββ pyproject.toml |
|
βββ src/ |
|
βββ mypkg/ |
|
βββ __init__.py |
|
βββ mymodule.py |
|
βββ my_data_file.txt |
|
|
|
- "flat-layout": a Python project that does not use "src-layout" but instead |
|
have a directory under the project root for each package:: |
|
|
|
. |
|
βββ tox.ini |
|
βββ pyproject.toml |
|
βββ mypkg/ |
|
βββ __init__.py |
|
βββ mymodule.py |
|
βββ my_data_file.txt |
|
|
|
- "single-module": a project that contains a single Python script direct under |
|
the project root (no directory used):: |
|
|
|
. |
|
βββ tox.ini |
|
βββ pyproject.toml |
|
βββ mymodule.py |
|
|
|
""" |
|
|
|
import itertools |
|
import os |
|
from fnmatch import fnmatchcase |
|
from glob import glob |
|
from pathlib import Path |
|
from typing import ( |
|
TYPE_CHECKING, |
|
Dict, |
|
Iterable, |
|
Iterator, |
|
List, |
|
Mapping, |
|
Optional, |
|
Tuple, |
|
Union |
|
) |
|
|
|
import _distutils_hack.override |
|
|
|
from distutils import log |
|
from distutils.util import convert_path |
|
|
|
_Path = Union[str, os.PathLike] |
|
StrIter = Iterator[str] |
|
|
|
chain_iter = itertools.chain.from_iterable |
|
|
|
if TYPE_CHECKING: |
|
from setuptools import Distribution |
|
|
|
|
|
def _valid_name(path: _Path) -> bool: |
|
|
|
return os.path.basename(path).isidentifier() |
|
|
|
|
|
class _Filter: |
|
""" |
|
Given a list of patterns, create a callable that will be true only if |
|
the input matches at least one of the patterns. |
|
""" |
|
|
|
def __init__(self, *patterns: str): |
|
self._patterns = dict.fromkeys(patterns) |
|
|
|
def __call__(self, item: str) -> bool: |
|
return any(fnmatchcase(item, pat) for pat in self._patterns) |
|
|
|
def __contains__(self, item: str) -> bool: |
|
return item in self._patterns |
|
|
|
|
|
class _Finder: |
|
"""Base class that exposes functionality for module/package finders""" |
|
|
|
ALWAYS_EXCLUDE: Tuple[str, ...] = () |
|
DEFAULT_EXCLUDE: Tuple[str, ...] = () |
|
|
|
@classmethod |
|
def find( |
|
cls, |
|
where: _Path = '.', |
|
exclude: Iterable[str] = (), |
|
include: Iterable[str] = ('*',) |
|
) -> List[str]: |
|
"""Return a list of all Python items (packages or modules, depending on |
|
the finder implementation) found within directory 'where'. |
|
|
|
'where' is the root directory which will be searched. |
|
It should be supplied as a "cross-platform" (i.e. URL-style) path; |
|
it will be converted to the appropriate local path syntax. |
|
|
|
'exclude' is a sequence of names to exclude; '*' can be used |
|
as a wildcard in the names. |
|
When finding packages, 'foo.*' will exclude all subpackages of 'foo' |
|
(but not 'foo' itself). |
|
|
|
'include' is a sequence of names to include. |
|
If it's specified, only the named items will be included. |
|
If it's not specified, all found items will be included. |
|
'include' can contain shell style wildcard patterns just like |
|
'exclude'. |
|
""" |
|
|
|
exclude = exclude or cls.DEFAULT_EXCLUDE |
|
return list( |
|
cls._find_iter( |
|
convert_path(str(where)), |
|
_Filter(*cls.ALWAYS_EXCLUDE, *exclude), |
|
_Filter(*include), |
|
) |
|
) |
|
|
|
@classmethod |
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: |
|
raise NotImplementedError |
|
|
|
|
|
class PackageFinder(_Finder): |
|
""" |
|
Generate a list of all Python packages found within a directory |
|
""" |
|
|
|
ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__") |
|
|
|
@classmethod |
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: |
|
""" |
|
All the packages found in 'where' that pass the 'include' filter, but |
|
not the 'exclude' filter. |
|
""" |
|
for root, dirs, files in os.walk(str(where), followlinks=True): |
|
|
|
all_dirs = dirs[:] |
|
dirs[:] = [] |
|
|
|
for dir in all_dirs: |
|
full_path = os.path.join(root, dir) |
|
rel_path = os.path.relpath(full_path, where) |
|
package = rel_path.replace(os.path.sep, '.') |
|
|
|
|
|
if '.' in dir or not cls._looks_like_package(full_path, package): |
|
continue |
|
|
|
|
|
if include(package) and not exclude(package): |
|
yield package |
|
|
|
|
|
if f"{package}*" in exclude or f"{package}.*" in exclude: |
|
continue |
|
|
|
|
|
|
|
dirs.append(dir) |
|
|
|
@staticmethod |
|
def _looks_like_package(path: _Path, _package_name: str) -> bool: |
|
"""Does a directory look like a package?""" |
|
return os.path.isfile(os.path.join(path, '__init__.py')) |
|
|
|
|
|
class PEP420PackageFinder(PackageFinder): |
|
@staticmethod |
|
def _looks_like_package(_path: _Path, _package_name: str) -> bool: |
|
return True |
|
|
|
|
|
class ModuleFinder(_Finder): |
|
"""Find isolated Python modules. |
|
This function will **not** recurse subdirectories. |
|
""" |
|
|
|
@classmethod |
|
def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: |
|
for file in glob(os.path.join(where, "*.py")): |
|
module, _ext = os.path.splitext(os.path.basename(file)) |
|
|
|
if not cls._looks_like_module(module): |
|
continue |
|
|
|
if include(module) and not exclude(module): |
|
yield module |
|
|
|
_looks_like_module = staticmethod(_valid_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
class FlatLayoutPackageFinder(PEP420PackageFinder): |
|
_EXCLUDE = ( |
|
"ci", |
|
"bin", |
|
"doc", |
|
"docs", |
|
"documentation", |
|
"manpages", |
|
"news", |
|
"changelog", |
|
"test", |
|
"tests", |
|
"unit_test", |
|
"unit_tests", |
|
"example", |
|
"examples", |
|
"scripts", |
|
"tools", |
|
"util", |
|
"utils", |
|
"python", |
|
"build", |
|
"dist", |
|
"venv", |
|
"env", |
|
"requirements", |
|
|
|
"tasks", |
|
"fabfile", |
|
"site_scons", |
|
|
|
"benchmark", |
|
"benchmarks", |
|
"exercise", |
|
"exercises", |
|
"htmlcov", |
|
|
|
"[._]*", |
|
) |
|
|
|
DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE)) |
|
"""Reserved package names""" |
|
|
|
@staticmethod |
|
def _looks_like_package(_path: _Path, package_name: str) -> bool: |
|
names = package_name.split('.') |
|
|
|
root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs") |
|
return root_pkg_is_valid and all(name.isidentifier() for name in names[1:]) |
|
|
|
|
|
class FlatLayoutModuleFinder(ModuleFinder): |
|
DEFAULT_EXCLUDE = ( |
|
"setup", |
|
"conftest", |
|
"test", |
|
"tests", |
|
"example", |
|
"examples", |
|
"build", |
|
|
|
"toxfile", |
|
"noxfile", |
|
"pavement", |
|
"dodo", |
|
"tasks", |
|
"fabfile", |
|
|
|
"[Ss][Cc]onstruct", |
|
"conanfile", |
|
"manage", |
|
"benchmark", |
|
"benchmarks", |
|
"exercise", |
|
"exercises", |
|
|
|
"[._]*", |
|
) |
|
"""Reserved top-level module names""" |
|
|
|
|
|
def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]: |
|
nested = PEP420PackageFinder.find(pkg_dir) |
|
return [root_pkg] + [".".join((root_pkg, n)) for n in nested] |
|
|
|
|
|
class ConfigDiscovery: |
|
"""Fill-in metadata and options that can be automatically derived |
|
(from other metadata/options, the file system or conventions) |
|
""" |
|
|
|
def __init__(self, distribution: "Distribution"): |
|
self.dist = distribution |
|
self._called = False |
|
self._disabled = False |
|
self._skip_ext_modules = False |
|
|
|
def _disable(self): |
|
"""Internal API to disable automatic discovery""" |
|
self._disabled = True |
|
|
|
def _ignore_ext_modules(self): |
|
"""Internal API to disregard ext_modules. |
|
|
|
Normally auto-discovery would not be triggered if ``ext_modules`` are set |
|
(this is done for backward compatibility with existing packages relying on |
|
``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function |
|
to ignore given ``ext_modules`` and proceed with the auto-discovery if |
|
``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml |
|
metadata). |
|
""" |
|
self._skip_ext_modules = True |
|
|
|
@property |
|
def _root_dir(self) -> _Path: |
|
|
|
return self.dist.src_root or os.curdir |
|
|
|
@property |
|
def _package_dir(self) -> Dict[str, str]: |
|
if self.dist.package_dir is None: |
|
return {} |
|
return self.dist.package_dir |
|
|
|
def __call__(self, force=False, name=True, ignore_ext_modules=False): |
|
"""Automatically discover missing configuration fields |
|
and modifies the given ``distribution`` object in-place. |
|
|
|
Note that by default this will only have an effect the first time the |
|
``ConfigDiscovery`` object is called. |
|
|
|
To repeatedly invoke automatic discovery (e.g. when the project |
|
directory changes), please use ``force=True`` (or create a new |
|
``ConfigDiscovery`` instance). |
|
""" |
|
if force is False and (self._called or self._disabled): |
|
|
|
return |
|
|
|
self._analyse_package_layout(ignore_ext_modules) |
|
if name: |
|
self.analyse_name() |
|
|
|
self._called = True |
|
|
|
def _explicitly_specified(self, ignore_ext_modules: bool) -> bool: |
|
"""``True`` if the user has specified some form of package/module listing""" |
|
ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules |
|
ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules) |
|
return ( |
|
self.dist.packages is not None |
|
or self.dist.py_modules is not None |
|
or ext_modules |
|
or hasattr(self.dist, "configuration") and self.dist.configuration |
|
|
|
) |
|
|
|
def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool: |
|
if self._explicitly_specified(ignore_ext_modules): |
|
|
|
|
|
return True |
|
|
|
log.debug( |
|
"No `packages` or `py_modules` configuration, performing " |
|
"automatic discovery." |
|
) |
|
|
|
return ( |
|
self._analyse_explicit_layout() |
|
or self._analyse_src_layout() |
|
|
|
or self._analyse_flat_layout() |
|
) |
|
|
|
def _analyse_explicit_layout(self) -> bool: |
|
"""The user can explicitly give a package layout via ``package_dir``""" |
|
package_dir = self._package_dir.copy() |
|
package_dir.pop("", None) |
|
root_dir = self._root_dir |
|
|
|
if not package_dir: |
|
return False |
|
|
|
log.debug(f"`explicit-layout` detected -- analysing {package_dir}") |
|
pkgs = chain_iter( |
|
_find_packages_within(pkg, os.path.join(root_dir, parent_dir)) |
|
for pkg, parent_dir in package_dir.items() |
|
) |
|
self.dist.packages = list(pkgs) |
|
log.debug(f"discovered packages -- {self.dist.packages}") |
|
return True |
|
|
|
def _analyse_src_layout(self) -> bool: |
|
"""Try to find all packages or modules under the ``src`` directory |
|
(or anything pointed by ``package_dir[""]``). |
|
|
|
The "src-layout" is relatively safe for automatic discovery. |
|
We assume that everything within is meant to be included in the |
|
distribution. |
|
|
|
If ``package_dir[""]`` is not given, but the ``src`` directory exists, |
|
this function will set ``package_dir[""] = "src"``. |
|
""" |
|
package_dir = self._package_dir |
|
src_dir = os.path.join(self._root_dir, package_dir.get("", "src")) |
|
if not os.path.isdir(src_dir): |
|
return False |
|
|
|
log.debug(f"`src-layout` detected -- analysing {src_dir}") |
|
package_dir.setdefault("", os.path.basename(src_dir)) |
|
self.dist.package_dir = package_dir |
|
self.dist.packages = PEP420PackageFinder.find(src_dir) |
|
self.dist.py_modules = ModuleFinder.find(src_dir) |
|
log.debug(f"discovered packages -- {self.dist.packages}") |
|
log.debug(f"discovered py_modules -- {self.dist.py_modules}") |
|
return True |
|
|
|
def _analyse_flat_layout(self) -> bool: |
|
"""Try to find all packages and modules under the project root. |
|
|
|
Since the ``flat-layout`` is more dangerous in terms of accidentally including |
|
extra files/directories, this function is more conservative and will raise an |
|
error if multiple packages or modules are found. |
|
|
|
This assumes that multi-package dists are uncommon and refuse to support that |
|
use case in order to be able to prevent unintended errors. |
|
""" |
|
log.debug(f"`flat-layout` detected -- analysing {self._root_dir}") |
|
return self._analyse_flat_packages() or self._analyse_flat_modules() |
|
|
|
def _analyse_flat_packages(self) -> bool: |
|
self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir) |
|
top_level = remove_nested_packages(remove_stubs(self.dist.packages)) |
|
log.debug(f"discovered packages -- {self.dist.packages}") |
|
self._ensure_no_accidental_inclusion(top_level, "packages") |
|
return bool(top_level) |
|
|
|
def _analyse_flat_modules(self) -> bool: |
|
self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir) |
|
log.debug(f"discovered py_modules -- {self.dist.py_modules}") |
|
self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules") |
|
return bool(self.dist.py_modules) |
|
|
|
def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str): |
|
if len(detected) > 1: |
|
from inspect import cleandoc |
|
|
|
from setuptools.errors import PackageDiscoveryError |
|
|
|
msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}. |
|
|
|
To avoid accidental inclusion of unwanted files or directories, |
|
setuptools will not proceed with this build. |
|
|
|
If you are trying to create a single distribution with multiple {kind} |
|
on purpose, you should not rely on automatic discovery. |
|
Instead, consider the following options: |
|
|
|
1. set up custom discovery (`find` directive with `include` or `exclude`) |
|
2. use a `src-layout` |
|
3. explicitly set `py_modules` or `packages` with a list of names |
|
|
|
To find more information, look for "package discovery" on setuptools docs. |
|
""" |
|
raise PackageDiscoveryError(cleandoc(msg)) |
|
|
|
def analyse_name(self): |
|
"""The packages/modules are the essential contribution of the author. |
|
Therefore the name of the distribution can be derived from them. |
|
""" |
|
if self.dist.metadata.name or self.dist.name: |
|
|
|
return None |
|
|
|
log.debug("No `name` configuration, performing automatic discovery") |
|
|
|
name = ( |
|
self._find_name_single_package_or_module() |
|
or self._find_name_from_packages() |
|
) |
|
if name: |
|
self.dist.metadata.name = name |
|
|
|
def _find_name_single_package_or_module(self) -> Optional[str]: |
|
"""Exactly one module or package""" |
|
for field in ('packages', 'py_modules'): |
|
items = getattr(self.dist, field, None) or [] |
|
if items and len(items) == 1: |
|
log.debug(f"Single module/package detected, name: {items[0]}") |
|
return items[0] |
|
|
|
return None |
|
|
|
def _find_name_from_packages(self) -> Optional[str]: |
|
"""Try to find the root package that is not a PEP 420 namespace""" |
|
if not self.dist.packages: |
|
return None |
|
|
|
packages = remove_stubs(sorted(self.dist.packages, key=len)) |
|
package_dir = self.dist.package_dir or {} |
|
|
|
parent_pkg = find_parent_package(packages, package_dir, self._root_dir) |
|
if parent_pkg: |
|
log.debug(f"Common parent package detected, name: {parent_pkg}") |
|
return parent_pkg |
|
|
|
log.warn("No parent package detected, impossible to derive `name`") |
|
return None |
|
|
|
|
|
def remove_nested_packages(packages: List[str]) -> List[str]: |
|
"""Remove nested packages from a list of packages. |
|
|
|
>>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"]) |
|
['a'] |
|
>>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"]) |
|
['a', 'b', 'c.d', 'g.h'] |
|
""" |
|
pkgs = sorted(packages, key=len) |
|
top_level = pkgs[:] |
|
size = len(pkgs) |
|
for i, name in enumerate(reversed(pkgs)): |
|
if any(name.startswith(f"{other}.") for other in top_level): |
|
top_level.pop(size - i - 1) |
|
|
|
return top_level |
|
|
|
|
|
def remove_stubs(packages: List[str]) -> List[str]: |
|
"""Remove type stubs (:pep:`561`) from a list of packages. |
|
|
|
>>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"]) |
|
['a', 'a.b', 'b'] |
|
""" |
|
return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")] |
|
|
|
|
|
def find_parent_package( |
|
packages: List[str], package_dir: Mapping[str, str], root_dir: _Path |
|
) -> Optional[str]: |
|
"""Find the parent package that is not a namespace.""" |
|
packages = sorted(packages, key=len) |
|
common_ancestors = [] |
|
for i, name in enumerate(packages): |
|
if not all(n.startswith(f"{name}.") for n in packages[i+1:]): |
|
|
|
|
|
|
|
|
|
break |
|
common_ancestors.append(name) |
|
|
|
for name in common_ancestors: |
|
pkg_path = find_package_path(name, package_dir, root_dir) |
|
init = os.path.join(pkg_path, "__init__.py") |
|
if os.path.isfile(init): |
|
return name |
|
|
|
return None |
|
|
|
|
|
def find_package_path( |
|
name: str, package_dir: Mapping[str, str], root_dir: _Path |
|
) -> str: |
|
"""Given a package name, return the path where it should be found on |
|
disk, considering the ``package_dir`` option. |
|
|
|
>>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".") |
|
>>> path.replace(os.sep, "/") |
|
'./root/is/nested/my/pkg' |
|
|
|
>>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".") |
|
>>> path.replace(os.sep, "/") |
|
'./root/is/nested/pkg' |
|
|
|
>>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".") |
|
>>> path.replace(os.sep, "/") |
|
'./root/is/nested' |
|
|
|
>>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".") |
|
>>> path.replace(os.sep, "/") |
|
'./other/pkg' |
|
""" |
|
parts = name.split(".") |
|
for i in range(len(parts), 0, -1): |
|
|
|
partial_name = ".".join(parts[:i]) |
|
if partial_name in package_dir: |
|
parent = package_dir[partial_name] |
|
return os.path.join(root_dir, parent, *parts[i:]) |
|
|
|
parent = package_dir.get("") or "" |
|
return os.path.join(root_dir, *parent.split("/"), *parts) |
|
|
|
|
|
def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]: |
|
parent_pkgs = remove_nested_packages(packages) |
|
prefix = Path(package_path).parts |
|
return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs} |
|
|