|
import logging |
|
import mimetypes |
|
import os |
|
import pathlib |
|
from typing import Callable, Iterable, Optional, Tuple |
|
|
|
from pip._internal.models.candidate import InstallationCandidate |
|
from pip._internal.models.link import Link |
|
from pip._internal.utils.urls import path_to_url, url_to_path |
|
from pip._internal.vcs import is_url |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
FoundCandidates = Iterable[InstallationCandidate] |
|
FoundLinks = Iterable[Link] |
|
CandidatesFromPage = Callable[[Link], Iterable[InstallationCandidate]] |
|
PageValidator = Callable[[Link], bool] |
|
|
|
|
|
class LinkSource: |
|
@property |
|
def link(self) -> Optional[Link]: |
|
"""Returns the underlying link, if there's one.""" |
|
raise NotImplementedError() |
|
|
|
def page_candidates(self) -> FoundCandidates: |
|
"""Candidates found by parsing an archive listing HTML file.""" |
|
raise NotImplementedError() |
|
|
|
def file_links(self) -> FoundLinks: |
|
"""Links found by specifying archives directly.""" |
|
raise NotImplementedError() |
|
|
|
|
|
def _is_html_file(file_url: str) -> bool: |
|
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html" |
|
|
|
|
|
class _FlatDirectorySource(LinkSource): |
|
"""Link source specified by ``--find-links=<path-to-dir>``. |
|
|
|
This looks the content of the directory, and returns: |
|
|
|
* ``page_candidates``: Links listed on each HTML file in the directory. |
|
* ``file_candidates``: Archives in the directory. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
candidates_from_page: CandidatesFromPage, |
|
path: str, |
|
) -> None: |
|
self._candidates_from_page = candidates_from_page |
|
self._path = pathlib.Path(os.path.realpath(path)) |
|
|
|
@property |
|
def link(self) -> Optional[Link]: |
|
return None |
|
|
|
def page_candidates(self) -> FoundCandidates: |
|
for path in self._path.iterdir(): |
|
url = path_to_url(str(path)) |
|
if not _is_html_file(url): |
|
continue |
|
yield from self._candidates_from_page(Link(url)) |
|
|
|
def file_links(self) -> FoundLinks: |
|
for path in self._path.iterdir(): |
|
url = path_to_url(str(path)) |
|
if _is_html_file(url): |
|
continue |
|
yield Link(url) |
|
|
|
|
|
class _LocalFileSource(LinkSource): |
|
"""``--find-links=<path-or-url>`` or ``--[extra-]index-url=<path-or-url>``. |
|
|
|
If a URL is supplied, it must be a ``file:`` URL. If a path is supplied to |
|
the option, it is converted to a URL first. This returns: |
|
|
|
* ``page_candidates``: Links listed on an HTML file. |
|
* ``file_candidates``: The non-HTML file. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
candidates_from_page: CandidatesFromPage, |
|
link: Link, |
|
) -> None: |
|
self._candidates_from_page = candidates_from_page |
|
self._link = link |
|
|
|
@property |
|
def link(self) -> Optional[Link]: |
|
return self._link |
|
|
|
def page_candidates(self) -> FoundCandidates: |
|
if not _is_html_file(self._link.url): |
|
return |
|
yield from self._candidates_from_page(self._link) |
|
|
|
def file_links(self) -> FoundLinks: |
|
if _is_html_file(self._link.url): |
|
return |
|
yield self._link |
|
|
|
|
|
class _RemoteFileSource(LinkSource): |
|
"""``--find-links=<url>`` or ``--[extra-]index-url=<url>``. |
|
|
|
This returns: |
|
|
|
* ``page_candidates``: Links listed on an HTML file. |
|
* ``file_candidates``: The non-HTML file. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
candidates_from_page: CandidatesFromPage, |
|
page_validator: PageValidator, |
|
link: Link, |
|
) -> None: |
|
self._candidates_from_page = candidates_from_page |
|
self._page_validator = page_validator |
|
self._link = link |
|
|
|
@property |
|
def link(self) -> Optional[Link]: |
|
return self._link |
|
|
|
def page_candidates(self) -> FoundCandidates: |
|
if not self._page_validator(self._link): |
|
return |
|
yield from self._candidates_from_page(self._link) |
|
|
|
def file_links(self) -> FoundLinks: |
|
yield self._link |
|
|
|
|
|
class _IndexDirectorySource(LinkSource): |
|
"""``--[extra-]index-url=<path-to-directory>``. |
|
|
|
This is treated like a remote URL; ``candidates_from_page`` contains logic |
|
for this by appending ``index.html`` to the link. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
candidates_from_page: CandidatesFromPage, |
|
link: Link, |
|
) -> None: |
|
self._candidates_from_page = candidates_from_page |
|
self._link = link |
|
|
|
@property |
|
def link(self) -> Optional[Link]: |
|
return self._link |
|
|
|
def page_candidates(self) -> FoundCandidates: |
|
yield from self._candidates_from_page(self._link) |
|
|
|
def file_links(self) -> FoundLinks: |
|
return () |
|
|
|
|
|
def build_source( |
|
location: str, |
|
*, |
|
candidates_from_page: CandidatesFromPage, |
|
page_validator: PageValidator, |
|
expand_dir: bool, |
|
cache_link_parsing: bool, |
|
) -> Tuple[Optional[str], Optional[LinkSource]]: |
|
path: Optional[str] = None |
|
url: Optional[str] = None |
|
if os.path.exists(location): |
|
url = path_to_url(location) |
|
path = location |
|
elif location.startswith("file:"): |
|
url = location |
|
path = url_to_path(location) |
|
elif is_url(location): |
|
url = location |
|
|
|
if url is None: |
|
msg = ( |
|
"Location '%s' is ignored: " |
|
"it is either a non-existing path or lacks a specific scheme." |
|
) |
|
logger.warning(msg, location) |
|
return (None, None) |
|
|
|
if path is None: |
|
source: LinkSource = _RemoteFileSource( |
|
candidates_from_page=candidates_from_page, |
|
page_validator=page_validator, |
|
link=Link(url, cache_link_parsing=cache_link_parsing), |
|
) |
|
return (url, source) |
|
|
|
if os.path.isdir(path): |
|
if expand_dir: |
|
source = _FlatDirectorySource( |
|
candidates_from_page=candidates_from_page, |
|
path=path, |
|
) |
|
else: |
|
source = _IndexDirectorySource( |
|
candidates_from_page=candidates_from_page, |
|
link=Link(url, cache_link_parsing=cache_link_parsing), |
|
) |
|
return (url, source) |
|
elif os.path.isfile(path): |
|
source = _LocalFileSource( |
|
candidates_from_page=candidates_from_page, |
|
link=Link(url, cache_link_parsing=cache_link_parsing), |
|
) |
|
return (url, source) |
|
logger.warning( |
|
"Location '%s' is ignored: it is neither a file nor a directory.", |
|
location, |
|
) |
|
return (url, None) |
|
|