da0609001d38541f2e1d84b2fab95a3e5cb5413337fc2247150c3f19aae1664e
Browse files- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER +1 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE +29 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA +168 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD +104 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL +5 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt +1 -0
- lib/python3.11/site-packages/fsspec/parquet.py +551 -0
- lib/python3.11/site-packages/fsspec/registry.py +299 -0
- lib/python3.11/site-packages/fsspec/spec.py +1963 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +543 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/get.py +587 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/put.py +577 -0
- lib/python3.11/site-packages/fsspec/transaction.py +85 -0
- lib/python3.11/site-packages/fsspec/utils.py +742 -0
- lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so +0 -0
- lib/python3.11/site-packages/functorch/__init__.py +38 -0
- lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/__init__.py +0 -0
- lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
- lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py +7 -0
- lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
- lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
- lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
- lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__init__.py +179 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/batch_tensor.py +25 -0
- lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
- lib/python3.11/site-packages/functorch/dim/dim.py +110 -0
- lib/python3.11/site-packages/functorch/dim/magic_trace.py +42 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2018, Martin Durant
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
* Neither the name of the copyright holder nor the names of its
|
17 |
+
contributors may be used to endorse or promote products derived from
|
18 |
+
this software without specific prior written permission.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: fsspec
|
3 |
+
Version: 2023.12.2
|
4 |
+
Summary: File-system specification
|
5 |
+
Home-page: https://github.com/fsspec/filesystem_spec
|
6 |
+
Maintainer: Martin Durant
|
7 |
+
Maintainer-email: [email protected]
|
8 |
+
License: BSD
|
9 |
+
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
10 |
+
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
11 |
+
Keywords: file
|
12 |
+
Classifier: Development Status :: 4 - Beta
|
13 |
+
Classifier: Intended Audience :: Developers
|
14 |
+
Classifier: License :: OSI Approved :: BSD License
|
15 |
+
Classifier: Operating System :: OS Independent
|
16 |
+
Classifier: Programming Language :: Python :: 3.8
|
17 |
+
Classifier: Programming Language :: Python :: 3.9
|
18 |
+
Classifier: Programming Language :: Python :: 3.10
|
19 |
+
Classifier: Programming Language :: Python :: 3.11
|
20 |
+
Requires-Python: >=3.8
|
21 |
+
Description-Content-Type: text/markdown
|
22 |
+
License-File: LICENSE
|
23 |
+
Provides-Extra: abfs
|
24 |
+
Requires-Dist: adlfs ; extra == 'abfs'
|
25 |
+
Provides-Extra: adl
|
26 |
+
Requires-Dist: adlfs ; extra == 'adl'
|
27 |
+
Provides-Extra: arrow
|
28 |
+
Requires-Dist: pyarrow >=1 ; extra == 'arrow'
|
29 |
+
Provides-Extra: dask
|
30 |
+
Requires-Dist: dask ; extra == 'dask'
|
31 |
+
Requires-Dist: distributed ; extra == 'dask'
|
32 |
+
Provides-Extra: devel
|
33 |
+
Requires-Dist: pytest ; extra == 'devel'
|
34 |
+
Requires-Dist: pytest-cov ; extra == 'devel'
|
35 |
+
Provides-Extra: dropbox
|
36 |
+
Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
|
37 |
+
Requires-Dist: requests ; extra == 'dropbox'
|
38 |
+
Requires-Dist: dropbox ; extra == 'dropbox'
|
39 |
+
Provides-Extra: entrypoints
|
40 |
+
Provides-Extra: full
|
41 |
+
Requires-Dist: adlfs ; extra == 'full'
|
42 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
|
43 |
+
Requires-Dist: dask ; extra == 'full'
|
44 |
+
Requires-Dist: distributed ; extra == 'full'
|
45 |
+
Requires-Dist: dropbox ; extra == 'full'
|
46 |
+
Requires-Dist: dropboxdrivefs ; extra == 'full'
|
47 |
+
Requires-Dist: fusepy ; extra == 'full'
|
48 |
+
Requires-Dist: gcsfs ; extra == 'full'
|
49 |
+
Requires-Dist: libarchive-c ; extra == 'full'
|
50 |
+
Requires-Dist: ocifs ; extra == 'full'
|
51 |
+
Requires-Dist: panel ; extra == 'full'
|
52 |
+
Requires-Dist: paramiko ; extra == 'full'
|
53 |
+
Requires-Dist: pyarrow >=1 ; extra == 'full'
|
54 |
+
Requires-Dist: pygit2 ; extra == 'full'
|
55 |
+
Requires-Dist: requests ; extra == 'full'
|
56 |
+
Requires-Dist: s3fs ; extra == 'full'
|
57 |
+
Requires-Dist: smbprotocol ; extra == 'full'
|
58 |
+
Requires-Dist: tqdm ; extra == 'full'
|
59 |
+
Provides-Extra: fuse
|
60 |
+
Requires-Dist: fusepy ; extra == 'fuse'
|
61 |
+
Provides-Extra: gcs
|
62 |
+
Requires-Dist: gcsfs ; extra == 'gcs'
|
63 |
+
Provides-Extra: git
|
64 |
+
Requires-Dist: pygit2 ; extra == 'git'
|
65 |
+
Provides-Extra: github
|
66 |
+
Requires-Dist: requests ; extra == 'github'
|
67 |
+
Provides-Extra: gs
|
68 |
+
Requires-Dist: gcsfs ; extra == 'gs'
|
69 |
+
Provides-Extra: gui
|
70 |
+
Requires-Dist: panel ; extra == 'gui'
|
71 |
+
Provides-Extra: hdfs
|
72 |
+
Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
|
73 |
+
Provides-Extra: http
|
74 |
+
Requires-Dist: requests ; extra == 'http'
|
75 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
|
76 |
+
Provides-Extra: libarchive
|
77 |
+
Requires-Dist: libarchive-c ; extra == 'libarchive'
|
78 |
+
Provides-Extra: oci
|
79 |
+
Requires-Dist: ocifs ; extra == 'oci'
|
80 |
+
Provides-Extra: s3
|
81 |
+
Requires-Dist: s3fs ; extra == 's3'
|
82 |
+
Provides-Extra: sftp
|
83 |
+
Requires-Dist: paramiko ; extra == 'sftp'
|
84 |
+
Provides-Extra: smb
|
85 |
+
Requires-Dist: smbprotocol ; extra == 'smb'
|
86 |
+
Provides-Extra: ssh
|
87 |
+
Requires-Dist: paramiko ; extra == 'ssh'
|
88 |
+
Provides-Extra: tqdm
|
89 |
+
Requires-Dist: tqdm ; extra == 'tqdm'
|
90 |
+
|
91 |
+
# filesystem_spec
|
92 |
+
|
93 |
+
[![PyPI version](https://badge.fury.io/py/fsspec.svg)](https://pypi.python.org/pypi/fsspec/)
|
94 |
+
[![Anaconda-Server Badge](https://anaconda.org/conda-forge/fsspec/badges/version.svg)](https://anaconda.org/conda-forge/fsspec)
|
95 |
+
![Build](https://github.com/fsspec/filesystem_spec/workflows/CI/badge.svg)
|
96 |
+
[![Docs](https://readthedocs.org/projects/filesystem-spec/badge/?version=latest)](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
97 |
+
[![PyPi downloads](https://img.shields.io/pypi/dm/fsspec?label=pypi%20downloads&style=flat)](https://pepy.tech/project/fsspec)
|
98 |
+
|
99 |
+
A specification for pythonic filesystems.
|
100 |
+
|
101 |
+
## Install
|
102 |
+
|
103 |
+
```bash
|
104 |
+
pip install fsspec
|
105 |
+
```
|
106 |
+
|
107 |
+
would install the base fsspec. Various optionally supported features might require specification of custom
|
108 |
+
extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
|
109 |
+
Use `pip install fsspec[full]` for installation of all known extra dependencies.
|
110 |
+
|
111 |
+
Up-to-date package also provided through conda-forge distribution:
|
112 |
+
|
113 |
+
```bash
|
114 |
+
conda install -c conda-forge fsspec
|
115 |
+
```
|
116 |
+
|
117 |
+
|
118 |
+
## Purpose
|
119 |
+
|
120 |
+
To produce a template or specification for a file-system interface, that specific implementations should follow,
|
121 |
+
so that applications making use of them can rely on a common behaviour and not have to worry about the specific
|
122 |
+
internal implementation decisions with any given backend. Many such implementations are included in this package,
|
123 |
+
or in sister projects such as `s3fs` and `gcsfs`.
|
124 |
+
|
125 |
+
In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
|
126 |
+
mounting of the file-system implementation may be available for all implementations "for free".
|
127 |
+
|
128 |
+
## Documentation
|
129 |
+
|
130 |
+
Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
131 |
+
|
132 |
+
## Develop
|
133 |
+
|
134 |
+
fsspec uses GitHub Actions for CI. Environment files can be found
|
135 |
+
in the "ci/" directory. Note that the main environment is called "py38",
|
136 |
+
but it is expected that the version of python installed be adjustable at
|
137 |
+
CI runtime. For local use, pick a version suitable for you.
|
138 |
+
|
139 |
+
### Testing
|
140 |
+
|
141 |
+
Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
|
142 |
+
|
143 |
+
The full fsspec suite requires a system-level docker, docker-compose, and fuse
|
144 |
+
installation. If only making changes to one backend implementation, it is
|
145 |
+
not generally necessary to run all tests locally.
|
146 |
+
|
147 |
+
It is expected that contributors ensure that any change to fsspec does not
|
148 |
+
cause issues or regressions for either other fsspec-related packages such
|
149 |
+
as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
|
150 |
+
run and corresponding environment file run a set of tests from the dask
|
151 |
+
test suite, and very minimal tests against pandas and zarr from the
|
152 |
+
test_downstream.py module in this repo.
|
153 |
+
|
154 |
+
### Code Formatting
|
155 |
+
|
156 |
+
fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
|
157 |
+
a consistent code format throughout the project.
|
158 |
+
Run ``black fsspec`` from the root of the filesystem_spec repository to
|
159 |
+
auto-format your code. Additionally, many editors have plugins that will apply
|
160 |
+
``black`` as you edit files. ``black`` is included in the ``tox`` environments.
|
161 |
+
|
162 |
+
Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
|
163 |
+
automatically run ``black`` when you make a git commit.
|
164 |
+
Run ``pre-commit install --install-hooks`` from the root of the
|
165 |
+
filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
166 |
+
before you commit, reformatting any changed files. You can format without
|
167 |
+
committing via ``pre-commit run`` or skip these checks with ``git commit
|
168 |
+
--no-verify``.
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fsspec-2023.12.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
2 |
+
fsspec-2023.12.2.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
3 |
+
fsspec-2023.12.2.dist-info/METADATA,sha256=toLeg14fW_MfA33P2NVIPEyWFL7k004pAolypgHrECQ,6829
|
4 |
+
fsspec-2023.12.2.dist-info/RECORD,,
|
5 |
+
fsspec-2023.12.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
6 |
+
fsspec-2023.12.2.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
|
7 |
+
fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
|
8 |
+
fsspec/__pycache__/__init__.cpython-311.pyc,,
|
9 |
+
fsspec/__pycache__/_version.cpython-311.pyc,,
|
10 |
+
fsspec/__pycache__/archive.cpython-311.pyc,,
|
11 |
+
fsspec/__pycache__/asyn.cpython-311.pyc,,
|
12 |
+
fsspec/__pycache__/caching.cpython-311.pyc,,
|
13 |
+
fsspec/__pycache__/callbacks.cpython-311.pyc,,
|
14 |
+
fsspec/__pycache__/compression.cpython-311.pyc,,
|
15 |
+
fsspec/__pycache__/config.cpython-311.pyc,,
|
16 |
+
fsspec/__pycache__/conftest.cpython-311.pyc,,
|
17 |
+
fsspec/__pycache__/core.cpython-311.pyc,,
|
18 |
+
fsspec/__pycache__/dircache.cpython-311.pyc,,
|
19 |
+
fsspec/__pycache__/exceptions.cpython-311.pyc,,
|
20 |
+
fsspec/__pycache__/fuse.cpython-311.pyc,,
|
21 |
+
fsspec/__pycache__/generic.cpython-311.pyc,,
|
22 |
+
fsspec/__pycache__/gui.cpython-311.pyc,,
|
23 |
+
fsspec/__pycache__/mapping.cpython-311.pyc,,
|
24 |
+
fsspec/__pycache__/parquet.cpython-311.pyc,,
|
25 |
+
fsspec/__pycache__/registry.cpython-311.pyc,,
|
26 |
+
fsspec/__pycache__/spec.cpython-311.pyc,,
|
27 |
+
fsspec/__pycache__/transaction.cpython-311.pyc,,
|
28 |
+
fsspec/__pycache__/utils.cpython-311.pyc,,
|
29 |
+
fsspec/_version.py,sha256=Kf9CIUDExVlqHjn9lLOn0QJcfeRWAe0PFvFHkRzI9iA,501
|
30 |
+
fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
|
31 |
+
fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
|
32 |
+
fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
|
33 |
+
fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
|
34 |
+
fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
|
35 |
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
36 |
+
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
37 |
+
fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
|
38 |
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
39 |
+
fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
|
40 |
+
fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
|
41 |
+
fsspec/generic.py,sha256=2EcEegwdTLyQ2qSgz3Y6cbAuiWz7bybsEWai_XYkGtw,13457
|
42 |
+
fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
|
43 |
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44 |
+
fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
|
45 |
+
fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
|
46 |
+
fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
|
47 |
+
fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
|
48 |
+
fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
|
49 |
+
fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
|
50 |
+
fsspec/implementations/__pycache__/data.cpython-311.pyc,,
|
51 |
+
fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
|
52 |
+
fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
|
53 |
+
fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
|
54 |
+
fsspec/implementations/__pycache__/git.cpython-311.pyc,,
|
55 |
+
fsspec/implementations/__pycache__/github.cpython-311.pyc,,
|
56 |
+
fsspec/implementations/__pycache__/http.cpython-311.pyc,,
|
57 |
+
fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
|
58 |
+
fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
|
59 |
+
fsspec/implementations/__pycache__/local.cpython-311.pyc,,
|
60 |
+
fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
|
61 |
+
fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
|
62 |
+
fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
|
63 |
+
fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
|
64 |
+
fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
|
65 |
+
fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
|
66 |
+
fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
|
67 |
+
fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
|
68 |
+
fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
|
69 |
+
fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
|
70 |
+
fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
|
71 |
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
72 |
+
fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
|
73 |
+
fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
|
74 |
+
fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
|
75 |
+
fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
|
76 |
+
fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
|
77 |
+
fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
|
78 |
+
fsspec/implementations/http.py,sha256=cK7HQdVgR8PVLWkB0q0xsXohOP16X-zQiT2uqB1Kq4E,29265
|
79 |
+
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
80 |
+
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
81 |
+
fsspec/implementations/local.py,sha256=GV5OltZrz9aOM8KKSx3T7QE7-U9KX3BOz3Eql3jw_xY,13371
|
82 |
+
fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
|
83 |
+
fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
|
84 |
+
fsspec/implementations/sftp.py,sha256=TNmXVac9c5H9Gmiee2EjZNKXnXdkwwaNL2cHDkp_gG4,5632
|
85 |
+
fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
|
86 |
+
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
87 |
+
fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
|
88 |
+
fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
|
89 |
+
fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
|
90 |
+
fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
|
91 |
+
fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
|
92 |
+
fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
|
93 |
+
fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
|
94 |
+
fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
|
95 |
+
fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
|
96 |
+
fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
|
97 |
+
fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
|
98 |
+
fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
|
99 |
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
100 |
+
fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
|
101 |
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
102 |
+
fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
|
103 |
+
fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
|
104 |
+
fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: bdist_wheel (0.42.0)
|
3 |
+
Root-Is-Purelib: true
|
4 |
+
Tag: py3-none-any
|
5 |
+
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fsspec
|
lib/python3.11/site-packages/fsspec/parquet.py
ADDED
@@ -0,0 +1,551 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import json
|
3 |
+
import warnings
|
4 |
+
|
5 |
+
from .core import url_to_fs
|
6 |
+
from .utils import merge_offset_ranges
|
7 |
+
|
8 |
+
# Parquet-Specific Utilities for fsspec
|
9 |
+
#
|
10 |
+
# Most of the functions defined in this module are NOT
|
11 |
+
# intended for public consumption. The only exception
|
12 |
+
# to this is `open_parquet_file`, which should be used
|
13 |
+
# place of `fs.open()` to open parquet-formatted files
|
14 |
+
# on remote file systems.
|
15 |
+
|
16 |
+
|
17 |
+
def open_parquet_file(
|
18 |
+
path,
|
19 |
+
mode="rb",
|
20 |
+
fs=None,
|
21 |
+
metadata=None,
|
22 |
+
columns=None,
|
23 |
+
row_groups=None,
|
24 |
+
storage_options=None,
|
25 |
+
strict=False,
|
26 |
+
engine="auto",
|
27 |
+
max_gap=64_000,
|
28 |
+
max_block=256_000_000,
|
29 |
+
footer_sample_size=1_000_000,
|
30 |
+
**kwargs,
|
31 |
+
):
|
32 |
+
"""
|
33 |
+
Return a file-like object for a single Parquet file.
|
34 |
+
|
35 |
+
The specified parquet `engine` will be used to parse the
|
36 |
+
footer metadata, and determine the required byte ranges
|
37 |
+
from the file. The target path will then be opened with
|
38 |
+
the "parts" (`KnownPartsOfAFile`) caching strategy.
|
39 |
+
|
40 |
+
Note that this method is intended for usage with remote
|
41 |
+
file systems, and is unlikely to improve parquet-read
|
42 |
+
performance on local file systems.
|
43 |
+
|
44 |
+
Parameters
|
45 |
+
----------
|
46 |
+
path: str
|
47 |
+
Target file path.
|
48 |
+
mode: str, optional
|
49 |
+
Mode option to be passed through to `fs.open`. Default is "rb".
|
50 |
+
metadata: Any, optional
|
51 |
+
Parquet metadata object. Object type must be supported
|
52 |
+
by the backend parquet engine. For now, only the "fastparquet"
|
53 |
+
engine supports an explicit `ParquetFile` metadata object.
|
54 |
+
If a metadata object is supplied, the remote footer metadata
|
55 |
+
will not need to be transferred into local memory.
|
56 |
+
fs: AbstractFileSystem, optional
|
57 |
+
Filesystem object to use for opening the file. If nothing is
|
58 |
+
specified, an `AbstractFileSystem` object will be inferred.
|
59 |
+
engine : str, default "auto"
|
60 |
+
Parquet engine to use for metadata parsing. Allowed options
|
61 |
+
include "fastparquet", "pyarrow", and "auto". The specified
|
62 |
+
engine must be installed in the current environment. If
|
63 |
+
"auto" is specified, and both engines are installed,
|
64 |
+
"fastparquet" will take precedence over "pyarrow".
|
65 |
+
columns: list, optional
|
66 |
+
List of all column names that may be read from the file.
|
67 |
+
row_groups : list, optional
|
68 |
+
List of all row-groups that may be read from the file. This
|
69 |
+
may be a list of row-group indices (integers), or it may be
|
70 |
+
a list of `RowGroup` metadata objects (if the "fastparquet"
|
71 |
+
engine is used).
|
72 |
+
storage_options : dict, optional
|
73 |
+
Used to generate an `AbstractFileSystem` object if `fs` was
|
74 |
+
not specified.
|
75 |
+
strict : bool, optional
|
76 |
+
Whether the resulting `KnownPartsOfAFile` cache should
|
77 |
+
fetch reads that go beyond a known byte-range boundary.
|
78 |
+
If `False` (the default), any read that ends outside a
|
79 |
+
known part will be zero padded. Note that using
|
80 |
+
`strict=True` may be useful for debugging.
|
81 |
+
max_gap : int, optional
|
82 |
+
Neighboring byte ranges will only be merged when their
|
83 |
+
inter-range gap is <= `max_gap`. Default is 64KB.
|
84 |
+
max_block : int, optional
|
85 |
+
Neighboring byte ranges will only be merged when the size of
|
86 |
+
the aggregated range is <= `max_block`. Default is 256MB.
|
87 |
+
footer_sample_size : int, optional
|
88 |
+
Number of bytes to read from the end of the path to look
|
89 |
+
for the footer metadata. If the sampled bytes do not contain
|
90 |
+
the footer, a second read request will be required, and
|
91 |
+
performance will suffer. Default is 1MB.
|
92 |
+
**kwargs :
|
93 |
+
Optional key-word arguments to pass to `fs.open`
|
94 |
+
"""
|
95 |
+
|
96 |
+
# Make sure we have an `AbstractFileSystem` object
|
97 |
+
# to work with
|
98 |
+
if fs is None:
|
99 |
+
fs = url_to_fs(path, **(storage_options or {}))[0]
|
100 |
+
|
101 |
+
# For now, `columns == []` not supported. Just use
|
102 |
+
# default `open` command with `path` input
|
103 |
+
if columns is not None and len(columns) == 0:
|
104 |
+
return fs.open(path, mode=mode)
|
105 |
+
|
106 |
+
# Set the engine
|
107 |
+
engine = _set_engine(engine)
|
108 |
+
|
109 |
+
# Fetch the known byte ranges needed to read
|
110 |
+
# `columns` and/or `row_groups`
|
111 |
+
data = _get_parquet_byte_ranges(
|
112 |
+
[path],
|
113 |
+
fs,
|
114 |
+
metadata=metadata,
|
115 |
+
columns=columns,
|
116 |
+
row_groups=row_groups,
|
117 |
+
engine=engine,
|
118 |
+
max_gap=max_gap,
|
119 |
+
max_block=max_block,
|
120 |
+
footer_sample_size=footer_sample_size,
|
121 |
+
)
|
122 |
+
|
123 |
+
# Extract file name from `data`
|
124 |
+
fn = next(iter(data)) if data else path
|
125 |
+
|
126 |
+
# Call self.open with "parts" caching
|
127 |
+
options = kwargs.pop("cache_options", {}).copy()
|
128 |
+
return fs.open(
|
129 |
+
fn,
|
130 |
+
mode=mode,
|
131 |
+
cache_type="parts",
|
132 |
+
cache_options={
|
133 |
+
**options,
|
134 |
+
**{
|
135 |
+
"data": data.get(fn, {}),
|
136 |
+
"strict": strict,
|
137 |
+
},
|
138 |
+
},
|
139 |
+
**kwargs,
|
140 |
+
)
|
141 |
+
|
142 |
+
|
143 |
+
def _get_parquet_byte_ranges(
|
144 |
+
paths,
|
145 |
+
fs,
|
146 |
+
metadata=None,
|
147 |
+
columns=None,
|
148 |
+
row_groups=None,
|
149 |
+
max_gap=64_000,
|
150 |
+
max_block=256_000_000,
|
151 |
+
footer_sample_size=1_000_000,
|
152 |
+
engine="auto",
|
153 |
+
):
|
154 |
+
"""Get a dictionary of the known byte ranges needed
|
155 |
+
to read a specific column/row-group selection from a
|
156 |
+
Parquet dataset. Each value in the output dictionary
|
157 |
+
is intended for use as the `data` argument for the
|
158 |
+
`KnownPartsOfAFile` caching strategy of a single path.
|
159 |
+
"""
|
160 |
+
|
161 |
+
# Set engine if necessary
|
162 |
+
if isinstance(engine, str):
|
163 |
+
engine = _set_engine(engine)
|
164 |
+
|
165 |
+
# Pass to specialized function if metadata is defined
|
166 |
+
if metadata is not None:
|
167 |
+
|
168 |
+
# Use the provided parquet metadata object
|
169 |
+
# to avoid transferring/parsing footer metadata
|
170 |
+
return _get_parquet_byte_ranges_from_metadata(
|
171 |
+
metadata,
|
172 |
+
fs,
|
173 |
+
engine,
|
174 |
+
columns=columns,
|
175 |
+
row_groups=row_groups,
|
176 |
+
max_gap=max_gap,
|
177 |
+
max_block=max_block,
|
178 |
+
)
|
179 |
+
|
180 |
+
# Get file sizes asynchronously
|
181 |
+
file_sizes = fs.sizes(paths)
|
182 |
+
|
183 |
+
# Populate global paths, starts, & ends
|
184 |
+
result = {}
|
185 |
+
data_paths = []
|
186 |
+
data_starts = []
|
187 |
+
data_ends = []
|
188 |
+
add_header_magic = True
|
189 |
+
if columns is None and row_groups is None:
|
190 |
+
# We are NOT selecting specific columns or row-groups.
|
191 |
+
#
|
192 |
+
# We can avoid sampling the footers, and just transfer
|
193 |
+
# all file data with cat_ranges
|
194 |
+
for i, path in enumerate(paths):
|
195 |
+
result[path] = {}
|
196 |
+
for b in range(0, file_sizes[i], max_block):
|
197 |
+
data_paths.append(path)
|
198 |
+
data_starts.append(b)
|
199 |
+
data_ends.append(min(b + max_block, file_sizes[i]))
|
200 |
+
add_header_magic = False # "Magic" should already be included
|
201 |
+
else:
|
202 |
+
# We ARE selecting specific columns or row-groups.
|
203 |
+
#
|
204 |
+
# Gather file footers.
|
205 |
+
# We just take the last `footer_sample_size` bytes of each
|
206 |
+
# file (or the entire file if it is smaller than that)
|
207 |
+
footer_starts = []
|
208 |
+
footer_ends = []
|
209 |
+
for i, path in enumerate(paths):
|
210 |
+
footer_ends.append(file_sizes[i])
|
211 |
+
sample_size = max(0, file_sizes[i] - footer_sample_size)
|
212 |
+
footer_starts.append(sample_size)
|
213 |
+
footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
|
214 |
+
|
215 |
+
# Check our footer samples and re-sample if necessary.
|
216 |
+
missing_footer_starts = footer_starts.copy()
|
217 |
+
large_footer = 0
|
218 |
+
for i, path in enumerate(paths):
|
219 |
+
footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
|
220 |
+
real_footer_start = file_sizes[i] - (footer_size + 8)
|
221 |
+
if real_footer_start < footer_starts[i]:
|
222 |
+
missing_footer_starts[i] = real_footer_start
|
223 |
+
large_footer = max(large_footer, (footer_size + 8))
|
224 |
+
if large_footer:
|
225 |
+
warnings.warn(
|
226 |
+
f"Not enough data was used to sample the parquet footer. "
|
227 |
+
f"Try setting footer_sample_size >= {large_footer}."
|
228 |
+
)
|
229 |
+
for i, block in enumerate(
|
230 |
+
fs.cat_ranges(
|
231 |
+
paths,
|
232 |
+
missing_footer_starts,
|
233 |
+
footer_starts,
|
234 |
+
)
|
235 |
+
):
|
236 |
+
footer_samples[i] = block + footer_samples[i]
|
237 |
+
footer_starts[i] = missing_footer_starts[i]
|
238 |
+
|
239 |
+
# Calculate required byte ranges for each path
|
240 |
+
for i, path in enumerate(paths):
|
241 |
+
|
242 |
+
# Deal with small-file case.
|
243 |
+
# Just include all remaining bytes of the file
|
244 |
+
# in a single range.
|
245 |
+
if file_sizes[i] < max_block:
|
246 |
+
if footer_starts[i] > 0:
|
247 |
+
# Only need to transfer the data if the
|
248 |
+
# footer sample isn't already the whole file
|
249 |
+
data_paths.append(path)
|
250 |
+
data_starts.append(0)
|
251 |
+
data_ends.append(footer_starts[i])
|
252 |
+
continue
|
253 |
+
|
254 |
+
# Use "engine" to collect data byte ranges
|
255 |
+
path_data_starts, path_data_ends = engine._parquet_byte_ranges(
|
256 |
+
columns,
|
257 |
+
row_groups=row_groups,
|
258 |
+
footer=footer_samples[i],
|
259 |
+
footer_start=footer_starts[i],
|
260 |
+
)
|
261 |
+
|
262 |
+
data_paths += [path] * len(path_data_starts)
|
263 |
+
data_starts += path_data_starts
|
264 |
+
data_ends += path_data_ends
|
265 |
+
|
266 |
+
# Merge adjacent offset ranges
|
267 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
268 |
+
data_paths,
|
269 |
+
data_starts,
|
270 |
+
data_ends,
|
271 |
+
max_gap=max_gap,
|
272 |
+
max_block=max_block,
|
273 |
+
sort=False, # Should already be sorted
|
274 |
+
)
|
275 |
+
|
276 |
+
# Start by populating `result` with footer samples
|
277 |
+
for i, path in enumerate(paths):
|
278 |
+
result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
|
279 |
+
|
280 |
+
# Transfer the data byte-ranges into local memory
|
281 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
282 |
+
|
283 |
+
# Add b"PAR1" to header if necessary
|
284 |
+
if add_header_magic:
|
285 |
+
_add_header_magic(result)
|
286 |
+
|
287 |
+
return result
|
288 |
+
|
289 |
+
|
290 |
+
def _get_parquet_byte_ranges_from_metadata(
|
291 |
+
metadata,
|
292 |
+
fs,
|
293 |
+
engine,
|
294 |
+
columns=None,
|
295 |
+
row_groups=None,
|
296 |
+
max_gap=64_000,
|
297 |
+
max_block=256_000_000,
|
298 |
+
):
|
299 |
+
"""Simplified version of `_get_parquet_byte_ranges` for
|
300 |
+
the case that an engine-specific `metadata` object is
|
301 |
+
provided, and the remote footer metadata does not need to
|
302 |
+
be transferred before calculating the required byte ranges.
|
303 |
+
"""
|
304 |
+
|
305 |
+
# Use "engine" to collect data byte ranges
|
306 |
+
data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
|
307 |
+
columns,
|
308 |
+
row_groups=row_groups,
|
309 |
+
metadata=metadata,
|
310 |
+
)
|
311 |
+
|
312 |
+
# Merge adjacent offset ranges
|
313 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
314 |
+
data_paths,
|
315 |
+
data_starts,
|
316 |
+
data_ends,
|
317 |
+
max_gap=max_gap,
|
318 |
+
max_block=max_block,
|
319 |
+
sort=False, # Should be sorted
|
320 |
+
)
|
321 |
+
|
322 |
+
# Transfer the data byte-ranges into local memory
|
323 |
+
result = {fn: {} for fn in list(set(data_paths))}
|
324 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
325 |
+
|
326 |
+
# Add b"PAR1" to header
|
327 |
+
_add_header_magic(result)
|
328 |
+
|
329 |
+
return result
|
330 |
+
|
331 |
+
|
332 |
+
def _transfer_ranges(fs, blocks, paths, starts, ends):
|
333 |
+
# Use cat_ranges to gather the data byte_ranges
|
334 |
+
ranges = (paths, starts, ends)
|
335 |
+
for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
|
336 |
+
blocks[path][(start, stop)] = data
|
337 |
+
|
338 |
+
|
339 |
+
def _add_header_magic(data):
|
340 |
+
# Add b"PAR1" to file headers
|
341 |
+
for i, path in enumerate(list(data.keys())):
|
342 |
+
add_magic = True
|
343 |
+
for k in data[path].keys():
|
344 |
+
if k[0] == 0 and k[1] >= 4:
|
345 |
+
add_magic = False
|
346 |
+
break
|
347 |
+
if add_magic:
|
348 |
+
data[path][(0, 4)] = b"PAR1"
|
349 |
+
|
350 |
+
|
351 |
+
def _set_engine(engine_str):
|
352 |
+
|
353 |
+
# Define a list of parquet engines to try
|
354 |
+
if engine_str == "auto":
|
355 |
+
try_engines = ("fastparquet", "pyarrow")
|
356 |
+
elif not isinstance(engine_str, str):
|
357 |
+
raise ValueError(
|
358 |
+
"Failed to set parquet engine! "
|
359 |
+
"Please pass 'fastparquet', 'pyarrow', or 'auto'"
|
360 |
+
)
|
361 |
+
elif engine_str not in ("fastparquet", "pyarrow"):
|
362 |
+
raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
|
363 |
+
else:
|
364 |
+
try_engines = [engine_str]
|
365 |
+
|
366 |
+
# Try importing the engines in `try_engines`,
|
367 |
+
# and choose the first one that succeeds
|
368 |
+
for engine in try_engines:
|
369 |
+
try:
|
370 |
+
if engine == "fastparquet":
|
371 |
+
return FastparquetEngine()
|
372 |
+
elif engine == "pyarrow":
|
373 |
+
return PyarrowEngine()
|
374 |
+
except ImportError:
|
375 |
+
pass
|
376 |
+
|
377 |
+
# Raise an error if a supported parquet engine
|
378 |
+
# was not found
|
379 |
+
raise ImportError(
|
380 |
+
f"The following parquet engines are not installed "
|
381 |
+
f"in your python environment: {try_engines}."
|
382 |
+
f"Please install 'fastparquert' or 'pyarrow' to "
|
383 |
+
f"utilize the `fsspec.parquet` module."
|
384 |
+
)
|
385 |
+
|
386 |
+
|
387 |
+
class FastparquetEngine:
|
388 |
+
|
389 |
+
# The purpose of the FastparquetEngine class is
|
390 |
+
# to check if fastparquet can be imported (on initialization)
|
391 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
392 |
+
# future, this class may also be used to define other
|
393 |
+
# methods/logic that are specific to fastparquet.
|
394 |
+
|
395 |
+
def __init__(self):
|
396 |
+
import fastparquet as fp
|
397 |
+
|
398 |
+
self.fp = fp
|
399 |
+
|
400 |
+
def _row_group_filename(self, row_group, pf):
|
401 |
+
return pf.row_group_filename(row_group)
|
402 |
+
|
403 |
+
def _parquet_byte_ranges(
|
404 |
+
self,
|
405 |
+
columns,
|
406 |
+
row_groups=None,
|
407 |
+
metadata=None,
|
408 |
+
footer=None,
|
409 |
+
footer_start=None,
|
410 |
+
):
|
411 |
+
|
412 |
+
# Initialize offset ranges and define ParqetFile metadata
|
413 |
+
pf = metadata
|
414 |
+
data_paths, data_starts, data_ends = [], [], []
|
415 |
+
if pf is None:
|
416 |
+
pf = self.fp.ParquetFile(io.BytesIO(footer))
|
417 |
+
|
418 |
+
# Convert columns to a set and add any index columns
|
419 |
+
# specified in the pandas metadata (just in case)
|
420 |
+
column_set = None if columns is None else set(columns)
|
421 |
+
if column_set is not None and hasattr(pf, "pandas_metadata"):
|
422 |
+
md_index = [
|
423 |
+
ind
|
424 |
+
for ind in pf.pandas_metadata.get("index_columns", [])
|
425 |
+
# Ignore RangeIndex information
|
426 |
+
if not isinstance(ind, dict)
|
427 |
+
]
|
428 |
+
column_set |= set(md_index)
|
429 |
+
|
430 |
+
# Check if row_groups is a list of integers
|
431 |
+
# or a list of row-group metadata
|
432 |
+
if row_groups and not isinstance(row_groups[0], int):
|
433 |
+
# Input row_groups contains row-group metadata
|
434 |
+
row_group_indices = None
|
435 |
+
else:
|
436 |
+
# Input row_groups contains row-group indices
|
437 |
+
row_group_indices = row_groups
|
438 |
+
row_groups = pf.row_groups
|
439 |
+
|
440 |
+
# Loop through column chunks to add required byte ranges
|
441 |
+
for r, row_group in enumerate(row_groups):
|
442 |
+
# Skip this row-group if we are targeting
|
443 |
+
# specific row-groups
|
444 |
+
if row_group_indices is None or r in row_group_indices:
|
445 |
+
|
446 |
+
# Find the target parquet-file path for `row_group`
|
447 |
+
fn = self._row_group_filename(row_group, pf)
|
448 |
+
|
449 |
+
for column in row_group.columns:
|
450 |
+
name = column.meta_data.path_in_schema[0]
|
451 |
+
# Skip this column if we are targeting a
|
452 |
+
# specific columns
|
453 |
+
if column_set is None or name in column_set:
|
454 |
+
file_offset0 = column.meta_data.dictionary_page_offset
|
455 |
+
if file_offset0 is None:
|
456 |
+
file_offset0 = column.meta_data.data_page_offset
|
457 |
+
num_bytes = column.meta_data.total_compressed_size
|
458 |
+
if footer_start is None or file_offset0 < footer_start:
|
459 |
+
data_paths.append(fn)
|
460 |
+
data_starts.append(file_offset0)
|
461 |
+
data_ends.append(
|
462 |
+
min(
|
463 |
+
file_offset0 + num_bytes,
|
464 |
+
footer_start or (file_offset0 + num_bytes),
|
465 |
+
)
|
466 |
+
)
|
467 |
+
|
468 |
+
if metadata:
|
469 |
+
# The metadata in this call may map to multiple
|
470 |
+
# file paths. Need to include `data_paths`
|
471 |
+
return data_paths, data_starts, data_ends
|
472 |
+
return data_starts, data_ends
|
473 |
+
|
474 |
+
|
475 |
+
class PyarrowEngine:
|
476 |
+
|
477 |
+
# The purpose of the PyarrowEngine class is
|
478 |
+
# to check if pyarrow can be imported (on initialization)
|
479 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
480 |
+
# future, this class may also be used to define other
|
481 |
+
# methods/logic that are specific to pyarrow.
|
482 |
+
|
483 |
+
def __init__(self):
|
484 |
+
import pyarrow.parquet as pq
|
485 |
+
|
486 |
+
self.pq = pq
|
487 |
+
|
488 |
+
def _row_group_filename(self, row_group, metadata):
|
489 |
+
raise NotImplementedError
|
490 |
+
|
491 |
+
def _parquet_byte_ranges(
|
492 |
+
self,
|
493 |
+
columns,
|
494 |
+
row_groups=None,
|
495 |
+
metadata=None,
|
496 |
+
footer=None,
|
497 |
+
footer_start=None,
|
498 |
+
):
|
499 |
+
|
500 |
+
if metadata is not None:
|
501 |
+
raise ValueError("metadata input not supported for PyarrowEngine")
|
502 |
+
|
503 |
+
data_starts, data_ends = [], []
|
504 |
+
md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
|
505 |
+
|
506 |
+
# Convert columns to a set and add any index columns
|
507 |
+
# specified in the pandas metadata (just in case)
|
508 |
+
column_set = None if columns is None else set(columns)
|
509 |
+
if column_set is not None:
|
510 |
+
schema = md.schema.to_arrow_schema()
|
511 |
+
has_pandas_metadata = (
|
512 |
+
schema.metadata is not None and b"pandas" in schema.metadata
|
513 |
+
)
|
514 |
+
if has_pandas_metadata:
|
515 |
+
md_index = [
|
516 |
+
ind
|
517 |
+
for ind in json.loads(
|
518 |
+
schema.metadata[b"pandas"].decode("utf8")
|
519 |
+
).get("index_columns", [])
|
520 |
+
# Ignore RangeIndex information
|
521 |
+
if not isinstance(ind, dict)
|
522 |
+
]
|
523 |
+
column_set |= set(md_index)
|
524 |
+
|
525 |
+
# Loop through column chunks to add required byte ranges
|
526 |
+
for r in range(md.num_row_groups):
|
527 |
+
# Skip this row-group if we are targeting
|
528 |
+
# specific row-groups
|
529 |
+
if row_groups is None or r in row_groups:
|
530 |
+
row_group = md.row_group(r)
|
531 |
+
for c in range(row_group.num_columns):
|
532 |
+
column = row_group.column(c)
|
533 |
+
name = column.path_in_schema
|
534 |
+
# Skip this column if we are targeting a
|
535 |
+
# specific columns
|
536 |
+
split_name = name.split(".")[0]
|
537 |
+
if (
|
538 |
+
column_set is None
|
539 |
+
or name in column_set
|
540 |
+
or split_name in column_set
|
541 |
+
):
|
542 |
+
file_offset0 = column.dictionary_page_offset
|
543 |
+
if file_offset0 is None:
|
544 |
+
file_offset0 = column.data_page_offset
|
545 |
+
num_bytes = column.total_compressed_size
|
546 |
+
if file_offset0 < footer_start:
|
547 |
+
data_starts.append(file_offset0)
|
548 |
+
data_ends.append(
|
549 |
+
min(file_offset0 + num_bytes, footer_start)
|
550 |
+
)
|
551 |
+
return data_starts, data_ends
|
lib/python3.11/site-packages/fsspec/registry.py
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import importlib
|
4 |
+
import types
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
__all__ = ["registry", "get_filesystem_class", "default"]
|
8 |
+
|
9 |
+
# internal, mutable
|
10 |
+
_registry: dict[str, type] = {}
|
11 |
+
|
12 |
+
# external, immutable
|
13 |
+
registry = types.MappingProxyType(_registry)
|
14 |
+
default = "file"
|
15 |
+
|
16 |
+
|
17 |
+
def register_implementation(name, cls, clobber=False, errtxt=None):
|
18 |
+
"""Add implementation class to the registry
|
19 |
+
|
20 |
+
Parameters
|
21 |
+
----------
|
22 |
+
name: str
|
23 |
+
Protocol name to associate with the class
|
24 |
+
cls: class or str
|
25 |
+
if a class: fsspec-compliant implementation class (normally inherits from
|
26 |
+
``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
|
27 |
+
str, the full path to an implementation class like package.module.class,
|
28 |
+
which gets added to known_implementations,
|
29 |
+
so the import is deferred until the filesystem is actually used.
|
30 |
+
clobber: bool (optional)
|
31 |
+
Whether to overwrite a protocol with the same name; if False, will raise
|
32 |
+
instead.
|
33 |
+
errtxt: str (optional)
|
34 |
+
If given, then a failure to import the given class will result in this
|
35 |
+
text being given.
|
36 |
+
"""
|
37 |
+
if isinstance(cls, str):
|
38 |
+
if name in known_implementations and clobber is False:
|
39 |
+
if cls != known_implementations[name]["class"]:
|
40 |
+
raise ValueError(
|
41 |
+
f"Name ({name}) already in the known_implementations and clobber "
|
42 |
+
f"is False"
|
43 |
+
)
|
44 |
+
else:
|
45 |
+
known_implementations[name] = {
|
46 |
+
"class": cls,
|
47 |
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
48 |
+
}
|
49 |
+
|
50 |
+
else:
|
51 |
+
if name in registry and clobber is False:
|
52 |
+
if _registry[name] is not cls:
|
53 |
+
raise ValueError(
|
54 |
+
f"Name ({name}) already in the registry and clobber is False"
|
55 |
+
)
|
56 |
+
else:
|
57 |
+
_registry[name] = cls
|
58 |
+
|
59 |
+
|
60 |
+
# protocols mapped to the class which implements them. This dict can be
|
61 |
+
# updated with register_implementation
|
62 |
+
known_implementations = {
|
63 |
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
64 |
+
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
65 |
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
66 |
+
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
67 |
+
"dropbox": {
|
68 |
+
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
69 |
+
"err": (
|
70 |
+
'DropboxFileSystem requires "dropboxdrivefs",'
|
71 |
+
'"requests" and "dropbox" to be installed'
|
72 |
+
),
|
73 |
+
},
|
74 |
+
"http": {
|
75 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
76 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
77 |
+
},
|
78 |
+
"https": {
|
79 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
80 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
81 |
+
},
|
82 |
+
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
83 |
+
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
84 |
+
"gcs": {
|
85 |
+
"class": "gcsfs.GCSFileSystem",
|
86 |
+
"err": "Please install gcsfs to access Google Storage",
|
87 |
+
},
|
88 |
+
"gs": {
|
89 |
+
"class": "gcsfs.GCSFileSystem",
|
90 |
+
"err": "Please install gcsfs to access Google Storage",
|
91 |
+
},
|
92 |
+
"gdrive": {
|
93 |
+
"class": "gdrivefs.GoogleDriveFileSystem",
|
94 |
+
"err": "Please install gdrivefs for access to Google Drive",
|
95 |
+
},
|
96 |
+
"sftp": {
|
97 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
98 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
99 |
+
},
|
100 |
+
"ssh": {
|
101 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
102 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
103 |
+
},
|
104 |
+
"ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
|
105 |
+
"hdfs": {
|
106 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
107 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
108 |
+
},
|
109 |
+
"arrow_hdfs": {
|
110 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
111 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
112 |
+
},
|
113 |
+
"webhdfs": {
|
114 |
+
"class": "fsspec.implementations.webhdfs.WebHDFS",
|
115 |
+
"err": 'webHDFS access requires "requests" to be installed',
|
116 |
+
},
|
117 |
+
"s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
118 |
+
"s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
119 |
+
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
120 |
+
"oci": {
|
121 |
+
"class": "ocifs.OCIFileSystem",
|
122 |
+
"err": "Install ocifs to access OCI Object Storage",
|
123 |
+
},
|
124 |
+
"ocilake": {
|
125 |
+
"class": "ocifs.OCIFileSystem",
|
126 |
+
"err": "Install ocifs to access OCI Data Lake",
|
127 |
+
},
|
128 |
+
"asynclocal": {
|
129 |
+
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
130 |
+
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
131 |
+
},
|
132 |
+
"adl": {
|
133 |
+
"class": "adlfs.AzureDatalakeFileSystem",
|
134 |
+
"err": "Install adlfs to access Azure Datalake Gen1",
|
135 |
+
},
|
136 |
+
"abfs": {
|
137 |
+
"class": "adlfs.AzureBlobFileSystem",
|
138 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
139 |
+
},
|
140 |
+
"az": {
|
141 |
+
"class": "adlfs.AzureBlobFileSystem",
|
142 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
143 |
+
},
|
144 |
+
"cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
145 |
+
"blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
146 |
+
"filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
|
147 |
+
"simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
|
148 |
+
"dask": {
|
149 |
+
"class": "fsspec.implementations.dask.DaskWorkerFileSystem",
|
150 |
+
"err": "Install dask distributed to access worker file system",
|
151 |
+
},
|
152 |
+
"dbfs": {
|
153 |
+
"class": "fsspec.implementations.dbfs.DatabricksFileSystem",
|
154 |
+
"err": "Install the requests package to use the DatabricksFileSystem",
|
155 |
+
},
|
156 |
+
"github": {
|
157 |
+
"class": "fsspec.implementations.github.GithubFileSystem",
|
158 |
+
"err": "Install the requests package to use the github FS",
|
159 |
+
},
|
160 |
+
"git": {
|
161 |
+
"class": "fsspec.implementations.git.GitFileSystem",
|
162 |
+
"err": "Install pygit2 to browse local git repos",
|
163 |
+
},
|
164 |
+
"smb": {
|
165 |
+
"class": "fsspec.implementations.smb.SMBFileSystem",
|
166 |
+
"err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
|
167 |
+
},
|
168 |
+
"jupyter": {
|
169 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
170 |
+
"err": "Jupyter FS requires requests to be installed",
|
171 |
+
},
|
172 |
+
"jlab": {
|
173 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
174 |
+
"err": "Jupyter FS requires requests to be installed",
|
175 |
+
},
|
176 |
+
"libarchive": {
|
177 |
+
"class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
|
178 |
+
"err": "LibArchive requires to be installed",
|
179 |
+
},
|
180 |
+
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
181 |
+
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
182 |
+
"oss": {
|
183 |
+
"class": "ossfs.OSSFileSystem",
|
184 |
+
"err": "Install ossfs to access Alibaba Object Storage System",
|
185 |
+
},
|
186 |
+
"webdav": {
|
187 |
+
"class": "webdav4.fsspec.WebdavFileSystem",
|
188 |
+
"err": "Install webdav4 to access WebDAV",
|
189 |
+
},
|
190 |
+
"dvc": {
|
191 |
+
"class": "dvc.api.DVCFileSystem",
|
192 |
+
"err": "Install dvc to access DVCFileSystem",
|
193 |
+
},
|
194 |
+
"hf": {
|
195 |
+
"class": "huggingface_hub.HfFileSystem",
|
196 |
+
"err": "Install huggingface_hub to access HfFileSystem",
|
197 |
+
},
|
198 |
+
"root": {
|
199 |
+
"class": "fsspec_xrootd.XRootDFileSystem",
|
200 |
+
"err": "Install fsspec-xrootd to access xrootd storage system."
|
201 |
+
+ " Note: 'root' is the protocol name for xrootd storage systems,"
|
202 |
+
+ " not referring to root directories",
|
203 |
+
},
|
204 |
+
"dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
|
205 |
+
"box": {
|
206 |
+
"class": "boxfs.BoxFileSystem",
|
207 |
+
"err": "Please install boxfs to access BoxFileSystem",
|
208 |
+
},
|
209 |
+
"lakefs": {
|
210 |
+
"class": "lakefs_spec.LakeFSFileSystem",
|
211 |
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
212 |
+
},
|
213 |
+
}
|
214 |
+
|
215 |
+
|
216 |
+
def get_filesystem_class(protocol):
|
217 |
+
"""Fetch named protocol implementation from the registry
|
218 |
+
|
219 |
+
The dict ``known_implementations`` maps protocol names to the locations
|
220 |
+
of classes implementing the corresponding file-system. When used for the
|
221 |
+
first time, appropriate imports will happen and the class will be placed in
|
222 |
+
the registry. All subsequent calls will fetch directly from the registry.
|
223 |
+
|
224 |
+
Some protocol implementations require additional dependencies, and so the
|
225 |
+
import may fail. In this case, the string in the "err" field of the
|
226 |
+
``known_implementations`` will be given as the error message.
|
227 |
+
"""
|
228 |
+
if not protocol:
|
229 |
+
protocol = default
|
230 |
+
|
231 |
+
if protocol not in registry:
|
232 |
+
if protocol not in known_implementations:
|
233 |
+
raise ValueError(f"Protocol not known: {protocol}")
|
234 |
+
bit = known_implementations[protocol]
|
235 |
+
try:
|
236 |
+
register_implementation(protocol, _import_class(bit["class"]))
|
237 |
+
except ImportError as e:
|
238 |
+
raise ImportError(bit["err"]) from e
|
239 |
+
cls = registry[protocol]
|
240 |
+
if getattr(cls, "protocol", None) in ("abstract", None):
|
241 |
+
cls.protocol = protocol
|
242 |
+
|
243 |
+
return cls
|
244 |
+
|
245 |
+
|
246 |
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
247 |
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
248 |
+
|
249 |
+
To fix, you should specify a lower version bound on s3fs, or
|
250 |
+
update the current installation.
|
251 |
+
"""
|
252 |
+
|
253 |
+
|
254 |
+
def _import_class(cls, minv=None):
|
255 |
+
"""Take a string FQP and return the imported class or identifier
|
256 |
+
|
257 |
+
clas is of the form "package.module.klass" or "package.module:subobject.klass"
|
258 |
+
"""
|
259 |
+
if ":" in cls:
|
260 |
+
mod, name = cls.rsplit(":", 1)
|
261 |
+
s3 = mod == "s3fs"
|
262 |
+
mod = importlib.import_module(mod)
|
263 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
264 |
+
warnings.warn(s3_msg)
|
265 |
+
for part in name.split("."):
|
266 |
+
mod = getattr(mod, part)
|
267 |
+
return mod
|
268 |
+
else:
|
269 |
+
mod, name = cls.rsplit(".", 1)
|
270 |
+
s3 = mod == "s3fs"
|
271 |
+
mod = importlib.import_module(mod)
|
272 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
273 |
+
warnings.warn(s3_msg)
|
274 |
+
return getattr(mod, name)
|
275 |
+
|
276 |
+
|
277 |
+
def filesystem(protocol, **storage_options):
|
278 |
+
"""Instantiate filesystems for given protocol and arguments
|
279 |
+
|
280 |
+
``storage_options`` are specific to the protocol being chosen, and are
|
281 |
+
passed directly to the class.
|
282 |
+
"""
|
283 |
+
if protocol == "arrow_hdfs":
|
284 |
+
warnings.warn(
|
285 |
+
"The 'arrow_hdfs' protocol has been deprecated and will be "
|
286 |
+
"removed in the future. Specify it as 'hdfs'.",
|
287 |
+
DeprecationWarning,
|
288 |
+
)
|
289 |
+
|
290 |
+
cls = get_filesystem_class(protocol)
|
291 |
+
return cls(**storage_options)
|
292 |
+
|
293 |
+
|
294 |
+
def available_protocols():
|
295 |
+
"""Return a list of the implemented protocols.
|
296 |
+
|
297 |
+
Note that any given protocol may require extra packages to be importable.
|
298 |
+
"""
|
299 |
+
return list(known_implementations)
|
lib/python3.11/site-packages/fsspec/spec.py
ADDED
@@ -0,0 +1,1963 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import io
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import threading
|
7 |
+
import warnings
|
8 |
+
import weakref
|
9 |
+
from errno import ESPIPE
|
10 |
+
from glob import has_magic
|
11 |
+
from hashlib import sha256
|
12 |
+
from typing import ClassVar
|
13 |
+
|
14 |
+
from .callbacks import _DEFAULT_CALLBACK
|
15 |
+
from .config import apply_config, conf
|
16 |
+
from .dircache import DirCache
|
17 |
+
from .transaction import Transaction
|
18 |
+
from .utils import (
|
19 |
+
_unstrip_protocol,
|
20 |
+
glob_translate,
|
21 |
+
isfilelike,
|
22 |
+
other_paths,
|
23 |
+
read_block,
|
24 |
+
stringify_path,
|
25 |
+
tokenize,
|
26 |
+
)
|
27 |
+
|
28 |
+
logger = logging.getLogger("fsspec")
|
29 |
+
|
30 |
+
|
31 |
+
def make_instance(cls, args, kwargs):
|
32 |
+
return cls(*args, **kwargs)
|
33 |
+
|
34 |
+
|
35 |
+
class _Cached(type):
|
36 |
+
"""
|
37 |
+
Metaclass for caching file system instances.
|
38 |
+
|
39 |
+
Notes
|
40 |
+
-----
|
41 |
+
Instances are cached according to
|
42 |
+
|
43 |
+
* The values of the class attributes listed in `_extra_tokenize_attributes`
|
44 |
+
* The arguments passed to ``__init__``.
|
45 |
+
|
46 |
+
This creates an additional reference to the filesystem, which prevents the
|
47 |
+
filesystem from being garbage collected when all *user* references go away.
|
48 |
+
A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
|
49 |
+
be made for a filesystem instance to be garbage collected.
|
50 |
+
"""
|
51 |
+
|
52 |
+
def __init__(cls, *args, **kwargs):
|
53 |
+
super().__init__(*args, **kwargs)
|
54 |
+
# Note: we intentionally create a reference here, to avoid garbage
|
55 |
+
# collecting instances when all other references are gone. To really
|
56 |
+
# delete a FileSystem, the cache must be cleared.
|
57 |
+
if conf.get("weakref_instance_cache"): # pragma: no cover
|
58 |
+
# debug option for analysing fork/spawn conditions
|
59 |
+
cls._cache = weakref.WeakValueDictionary()
|
60 |
+
else:
|
61 |
+
cls._cache = {}
|
62 |
+
cls._pid = os.getpid()
|
63 |
+
|
64 |
+
def __call__(cls, *args, **kwargs):
|
65 |
+
kwargs = apply_config(cls, kwargs)
|
66 |
+
extra_tokens = tuple(
|
67 |
+
getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
|
68 |
+
)
|
69 |
+
token = tokenize(
|
70 |
+
cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
|
71 |
+
)
|
72 |
+
skip = kwargs.pop("skip_instance_cache", False)
|
73 |
+
if os.getpid() != cls._pid:
|
74 |
+
cls._cache.clear()
|
75 |
+
cls._pid = os.getpid()
|
76 |
+
if not skip and cls.cachable and token in cls._cache:
|
77 |
+
cls._latest = token
|
78 |
+
return cls._cache[token]
|
79 |
+
else:
|
80 |
+
obj = super().__call__(*args, **kwargs)
|
81 |
+
# Setting _fs_token here causes some static linters to complain.
|
82 |
+
obj._fs_token_ = token
|
83 |
+
obj.storage_args = args
|
84 |
+
obj.storage_options = kwargs
|
85 |
+
if obj.async_impl and obj.mirror_sync_methods:
|
86 |
+
from .asyn import mirror_sync_methods
|
87 |
+
|
88 |
+
mirror_sync_methods(obj)
|
89 |
+
|
90 |
+
if cls.cachable and not skip:
|
91 |
+
cls._latest = token
|
92 |
+
cls._cache[token] = obj
|
93 |
+
return obj
|
94 |
+
|
95 |
+
|
96 |
+
class AbstractFileSystem(metaclass=_Cached):
|
97 |
+
"""
|
98 |
+
An abstract super-class for pythonic file-systems
|
99 |
+
|
100 |
+
Implementations are expected to be compatible with or, better, subclass
|
101 |
+
from here.
|
102 |
+
"""
|
103 |
+
|
104 |
+
cachable = True # this class can be cached, instances reused
|
105 |
+
_cached = False
|
106 |
+
blocksize = 2**22
|
107 |
+
sep = "/"
|
108 |
+
protocol: ClassVar[str | tuple[str, ...]] = "abstract"
|
109 |
+
_latest = None
|
110 |
+
async_impl = False
|
111 |
+
mirror_sync_methods = False
|
112 |
+
root_marker = "" # For some FSs, may require leading '/' or other character
|
113 |
+
transaction_type = Transaction
|
114 |
+
|
115 |
+
#: Extra *class attributes* that should be considered when hashing.
|
116 |
+
_extra_tokenize_attributes = ()
|
117 |
+
|
118 |
+
def __init__(self, *args, **storage_options):
|
119 |
+
"""Create and configure file-system instance
|
120 |
+
|
121 |
+
Instances may be cachable, so if similar enough arguments are seen
|
122 |
+
a new instance is not required. The token attribute exists to allow
|
123 |
+
implementations to cache instances if they wish.
|
124 |
+
|
125 |
+
A reasonable default should be provided if there are no arguments.
|
126 |
+
|
127 |
+
Subclasses should call this method.
|
128 |
+
|
129 |
+
Parameters
|
130 |
+
----------
|
131 |
+
use_listings_cache, listings_expiry_time, max_paths:
|
132 |
+
passed to ``DirCache``, if the implementation supports
|
133 |
+
directory listing caching. Pass use_listings_cache=False
|
134 |
+
to disable such caching.
|
135 |
+
skip_instance_cache: bool
|
136 |
+
If this is a cachable implementation, pass True here to force
|
137 |
+
creating a new instance even if a matching instance exists, and prevent
|
138 |
+
storing this instance.
|
139 |
+
asynchronous: bool
|
140 |
+
loop: asyncio-compatible IOLoop or None
|
141 |
+
"""
|
142 |
+
if self._cached:
|
143 |
+
# reusing instance, don't change
|
144 |
+
return
|
145 |
+
self._cached = True
|
146 |
+
self._intrans = False
|
147 |
+
self._transaction = None
|
148 |
+
self._invalidated_caches_in_transaction = []
|
149 |
+
self.dircache = DirCache(**storage_options)
|
150 |
+
|
151 |
+
if storage_options.pop("add_docs", None):
|
152 |
+
warnings.warn("add_docs is no longer supported.", FutureWarning)
|
153 |
+
|
154 |
+
if storage_options.pop("add_aliases", None):
|
155 |
+
warnings.warn("add_aliases has been removed.", FutureWarning)
|
156 |
+
# This is set in _Cached
|
157 |
+
self._fs_token_ = None
|
158 |
+
|
159 |
+
@property
|
160 |
+
def fsid(self):
|
161 |
+
"""Persistent filesystem id that can be used to compare filesystems
|
162 |
+
across sessions.
|
163 |
+
"""
|
164 |
+
raise NotImplementedError
|
165 |
+
|
166 |
+
@property
|
167 |
+
def _fs_token(self):
|
168 |
+
return self._fs_token_
|
169 |
+
|
170 |
+
def __dask_tokenize__(self):
|
171 |
+
return self._fs_token
|
172 |
+
|
173 |
+
def __hash__(self):
|
174 |
+
return int(self._fs_token, 16)
|
175 |
+
|
176 |
+
def __eq__(self, other):
|
177 |
+
return isinstance(other, type(self)) and self._fs_token == other._fs_token
|
178 |
+
|
179 |
+
def __reduce__(self):
|
180 |
+
return make_instance, (type(self), self.storage_args, self.storage_options)
|
181 |
+
|
182 |
+
@classmethod
|
183 |
+
def _strip_protocol(cls, path):
|
184 |
+
"""Turn path from fully-qualified to file-system-specific
|
185 |
+
|
186 |
+
May require FS-specific handling, e.g., for relative paths or links.
|
187 |
+
"""
|
188 |
+
if isinstance(path, list):
|
189 |
+
return [cls._strip_protocol(p) for p in path]
|
190 |
+
path = stringify_path(path)
|
191 |
+
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
|
192 |
+
for protocol in protos:
|
193 |
+
if path.startswith(protocol + "://"):
|
194 |
+
path = path[len(protocol) + 3 :]
|
195 |
+
elif path.startswith(protocol + "::"):
|
196 |
+
path = path[len(protocol) + 2 :]
|
197 |
+
path = path.rstrip("/")
|
198 |
+
# use of root_marker to make minimum required path, e.g., "/"
|
199 |
+
return path or cls.root_marker
|
200 |
+
|
201 |
+
def unstrip_protocol(self, name: str) -> str:
|
202 |
+
"""Format FS-specific path to generic, including protocol"""
|
203 |
+
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
204 |
+
for protocol in protos:
|
205 |
+
if name.startswith(f"{protocol}://"):
|
206 |
+
return name
|
207 |
+
return f"{protos[0]}://{name}"
|
208 |
+
|
209 |
+
@staticmethod
|
210 |
+
def _get_kwargs_from_urls(path):
|
211 |
+
"""If kwargs can be encoded in the paths, extract them here
|
212 |
+
|
213 |
+
This should happen before instantiation of the class; incoming paths
|
214 |
+
then should be amended to strip the options in methods.
|
215 |
+
|
216 |
+
Examples may look like an sftp path "sftp://user@host:/my/path", where
|
217 |
+
the user and host should become kwargs and later get stripped.
|
218 |
+
"""
|
219 |
+
# by default, nothing happens
|
220 |
+
return {}
|
221 |
+
|
222 |
+
@classmethod
|
223 |
+
def current(cls):
|
224 |
+
"""Return the most recently instantiated FileSystem
|
225 |
+
|
226 |
+
If no instance has been created, then create one with defaults
|
227 |
+
"""
|
228 |
+
if cls._latest in cls._cache:
|
229 |
+
return cls._cache[cls._latest]
|
230 |
+
return cls()
|
231 |
+
|
232 |
+
@property
|
233 |
+
def transaction(self):
|
234 |
+
"""A context within which files are committed together upon exit
|
235 |
+
|
236 |
+
Requires the file class to implement `.commit()` and `.discard()`
|
237 |
+
for the normal and exception cases.
|
238 |
+
"""
|
239 |
+
if self._transaction is None:
|
240 |
+
self._transaction = self.transaction_type(self)
|
241 |
+
return self._transaction
|
242 |
+
|
243 |
+
def start_transaction(self):
|
244 |
+
"""Begin write transaction for deferring files, non-context version"""
|
245 |
+
self._intrans = True
|
246 |
+
self._transaction = self.transaction_type(self)
|
247 |
+
return self.transaction
|
248 |
+
|
249 |
+
def end_transaction(self):
|
250 |
+
"""Finish write transaction, non-context version"""
|
251 |
+
self.transaction.complete()
|
252 |
+
self._transaction = None
|
253 |
+
# The invalid cache must be cleared after the transaction is completed.
|
254 |
+
for path in self._invalidated_caches_in_transaction:
|
255 |
+
self.invalidate_cache(path)
|
256 |
+
self._invalidated_caches_in_transaction.clear()
|
257 |
+
|
258 |
+
def invalidate_cache(self, path=None):
|
259 |
+
"""
|
260 |
+
Discard any cached directory information
|
261 |
+
|
262 |
+
Parameters
|
263 |
+
----------
|
264 |
+
path: string or None
|
265 |
+
If None, clear all listings cached else listings at or under given
|
266 |
+
path.
|
267 |
+
"""
|
268 |
+
# Not necessary to implement invalidation mechanism, may have no cache.
|
269 |
+
# But if have, you should call this method of parent class from your
|
270 |
+
# subclass to ensure expiring caches after transacations correctly.
|
271 |
+
# See the implementation of FTPFileSystem in ftp.py
|
272 |
+
if self._intrans:
|
273 |
+
self._invalidated_caches_in_transaction.append(path)
|
274 |
+
|
275 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
276 |
+
"""
|
277 |
+
Create directory entry at path
|
278 |
+
|
279 |
+
For systems that don't have true directories, may create an for
|
280 |
+
this instance only and not touch the real filesystem
|
281 |
+
|
282 |
+
Parameters
|
283 |
+
----------
|
284 |
+
path: str
|
285 |
+
location
|
286 |
+
create_parents: bool
|
287 |
+
if True, this is equivalent to ``makedirs``
|
288 |
+
kwargs:
|
289 |
+
may be permissions, etc.
|
290 |
+
"""
|
291 |
+
pass # not necessary to implement, may not have directories
|
292 |
+
|
293 |
+
def makedirs(self, path, exist_ok=False):
|
294 |
+
"""Recursively make directories
|
295 |
+
|
296 |
+
Creates directory at path and any intervening required directories.
|
297 |
+
Raises exception if, for instance, the path already exists but is a
|
298 |
+
file.
|
299 |
+
|
300 |
+
Parameters
|
301 |
+
----------
|
302 |
+
path: str
|
303 |
+
leaf directory name
|
304 |
+
exist_ok: bool (False)
|
305 |
+
If False, will error if the target already exists
|
306 |
+
"""
|
307 |
+
pass # not necessary to implement, may not have directories
|
308 |
+
|
309 |
+
def rmdir(self, path):
|
310 |
+
"""Remove a directory, if empty"""
|
311 |
+
pass # not necessary to implement, may not have directories
|
312 |
+
|
313 |
+
def ls(self, path, detail=True, **kwargs):
|
314 |
+
"""List objects at path.
|
315 |
+
|
316 |
+
This should include subdirectories and files at that location. The
|
317 |
+
difference between a file and a directory must be clear when details
|
318 |
+
are requested.
|
319 |
+
|
320 |
+
The specific keys, or perhaps a FileInfo class, or similar, is TBD,
|
321 |
+
but must be consistent across implementations.
|
322 |
+
Must include:
|
323 |
+
|
324 |
+
- full path to the entry (without protocol)
|
325 |
+
- size of the entry, in bytes. If the value cannot be determined, will
|
326 |
+
be ``None``.
|
327 |
+
- type of entry, "file", "directory" or other
|
328 |
+
|
329 |
+
Additional information
|
330 |
+
may be present, appropriate to the file-system, e.g., generation,
|
331 |
+
checksum, etc.
|
332 |
+
|
333 |
+
May use refresh=True|False to allow use of self._ls_from_cache to
|
334 |
+
check for a saved listing and avoid calling the backend. This would be
|
335 |
+
common where listing may be expensive.
|
336 |
+
|
337 |
+
Parameters
|
338 |
+
----------
|
339 |
+
path: str
|
340 |
+
detail: bool
|
341 |
+
if True, gives a list of dictionaries, where each is the same as
|
342 |
+
the result of ``info(path)``. If False, gives a list of paths
|
343 |
+
(str).
|
344 |
+
kwargs: may have additional backend-specific options, such as version
|
345 |
+
information
|
346 |
+
|
347 |
+
Returns
|
348 |
+
-------
|
349 |
+
List of strings if detail is False, or list of directory information
|
350 |
+
dicts if detail is True.
|
351 |
+
"""
|
352 |
+
raise NotImplementedError
|
353 |
+
|
354 |
+
def _ls_from_cache(self, path):
|
355 |
+
"""Check cache for listing
|
356 |
+
|
357 |
+
Returns listing, if found (may be empty list for a directly that exists
|
358 |
+
but contains nothing), None if not in cache.
|
359 |
+
"""
|
360 |
+
parent = self._parent(path)
|
361 |
+
if path.rstrip("/") in self.dircache:
|
362 |
+
return self.dircache[path.rstrip("/")]
|
363 |
+
try:
|
364 |
+
files = [
|
365 |
+
f
|
366 |
+
for f in self.dircache[parent]
|
367 |
+
if f["name"] == path
|
368 |
+
or (f["name"] == path.rstrip("/") and f["type"] == "directory")
|
369 |
+
]
|
370 |
+
if len(files) == 0:
|
371 |
+
# parent dir was listed but did not contain this file
|
372 |
+
raise FileNotFoundError(path)
|
373 |
+
return files
|
374 |
+
except KeyError:
|
375 |
+
pass
|
376 |
+
|
377 |
+
def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
|
378 |
+
"""Return all files belows path
|
379 |
+
|
380 |
+
List all files, recursing into subdirectories; output is iterator-style,
|
381 |
+
like ``os.walk()``. For a simple list of files, ``find()`` is available.
|
382 |
+
|
383 |
+
When topdown is True, the caller can modify the dirnames list in-place (perhaps
|
384 |
+
using del or slice assignment), and walk() will
|
385 |
+
only recurse into the subdirectories whose names remain in dirnames;
|
386 |
+
this can be used to prune the search, impose a specific order of visiting,
|
387 |
+
or even to inform walk() about directories the caller creates or renames before
|
388 |
+
it resumes walk() again.
|
389 |
+
Modifying dirnames when topdown is False has no effect. (see os.walk)
|
390 |
+
|
391 |
+
Note that the "files" outputted will include anything that is not
|
392 |
+
a directory, such as links.
|
393 |
+
|
394 |
+
Parameters
|
395 |
+
----------
|
396 |
+
path: str
|
397 |
+
Root to recurse into
|
398 |
+
maxdepth: int
|
399 |
+
Maximum recursion depth. None means limitless, but not recommended
|
400 |
+
on link-based file-systems.
|
401 |
+
topdown: bool (True)
|
402 |
+
Whether to walk the directory tree from the top downwards or from
|
403 |
+
the bottom upwards.
|
404 |
+
on_error: "omit", "raise", a collable
|
405 |
+
if omit (default), path with exception will simply be empty;
|
406 |
+
If raise, an underlying exception will be raised;
|
407 |
+
if callable, it will be called with a single OSError instance as argument
|
408 |
+
kwargs: passed to ``ls``
|
409 |
+
"""
|
410 |
+
if maxdepth is not None and maxdepth < 1:
|
411 |
+
raise ValueError("maxdepth must be at least 1")
|
412 |
+
|
413 |
+
path = self._strip_protocol(path)
|
414 |
+
full_dirs = {}
|
415 |
+
dirs = {}
|
416 |
+
files = {}
|
417 |
+
|
418 |
+
detail = kwargs.pop("detail", False)
|
419 |
+
try:
|
420 |
+
listing = self.ls(path, detail=True, **kwargs)
|
421 |
+
except (FileNotFoundError, OSError) as e:
|
422 |
+
if on_error == "raise":
|
423 |
+
raise
|
424 |
+
elif callable(on_error):
|
425 |
+
on_error(e)
|
426 |
+
if detail:
|
427 |
+
return path, {}, {}
|
428 |
+
return path, [], []
|
429 |
+
|
430 |
+
for info in listing:
|
431 |
+
# each info name must be at least [path]/part , but here
|
432 |
+
# we check also for names like [path]/part/
|
433 |
+
pathname = info["name"].rstrip("/")
|
434 |
+
name = pathname.rsplit("/", 1)[-1]
|
435 |
+
if info["type"] == "directory" and pathname != path:
|
436 |
+
# do not include "self" path
|
437 |
+
full_dirs[name] = pathname
|
438 |
+
dirs[name] = info
|
439 |
+
elif pathname == path:
|
440 |
+
# file-like with same name as give path
|
441 |
+
files[""] = info
|
442 |
+
else:
|
443 |
+
files[name] = info
|
444 |
+
|
445 |
+
if not detail:
|
446 |
+
dirs = list(dirs)
|
447 |
+
files = list(files)
|
448 |
+
|
449 |
+
if topdown:
|
450 |
+
# Yield before recursion if walking top down
|
451 |
+
yield path, dirs, files
|
452 |
+
|
453 |
+
if maxdepth is not None:
|
454 |
+
maxdepth -= 1
|
455 |
+
if maxdepth < 1:
|
456 |
+
if not topdown:
|
457 |
+
yield path, dirs, files
|
458 |
+
return
|
459 |
+
|
460 |
+
for d in dirs:
|
461 |
+
yield from self.walk(
|
462 |
+
full_dirs[d],
|
463 |
+
maxdepth=maxdepth,
|
464 |
+
detail=detail,
|
465 |
+
topdown=topdown,
|
466 |
+
**kwargs,
|
467 |
+
)
|
468 |
+
|
469 |
+
if not topdown:
|
470 |
+
# Yield after recursion if walking bottom up
|
471 |
+
yield path, dirs, files
|
472 |
+
|
473 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
474 |
+
"""List all files below path.
|
475 |
+
|
476 |
+
Like posix ``find`` command without conditions
|
477 |
+
|
478 |
+
Parameters
|
479 |
+
----------
|
480 |
+
path : str
|
481 |
+
maxdepth: int or None
|
482 |
+
If not None, the maximum number of levels to descend
|
483 |
+
withdirs: bool
|
484 |
+
Whether to include directory paths in the output. This is True
|
485 |
+
when used by glob, but users usually only want files.
|
486 |
+
kwargs are passed to ``ls``.
|
487 |
+
"""
|
488 |
+
# TODO: allow equivalent of -name parameter
|
489 |
+
path = self._strip_protocol(path)
|
490 |
+
out = {}
|
491 |
+
|
492 |
+
# Add the root directory if withdirs is requested
|
493 |
+
# This is needed for posix glob compliance
|
494 |
+
if withdirs and path != "" and self.isdir(path):
|
495 |
+
out[path] = self.info(path)
|
496 |
+
|
497 |
+
for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
|
498 |
+
if withdirs:
|
499 |
+
files.update(dirs)
|
500 |
+
out.update({info["name"]: info for name, info in files.items()})
|
501 |
+
if not out and self.isfile(path):
|
502 |
+
# walk works on directories, but find should also return [path]
|
503 |
+
# when path happens to be a file
|
504 |
+
out[path] = {}
|
505 |
+
names = sorted(out)
|
506 |
+
if not detail:
|
507 |
+
return names
|
508 |
+
else:
|
509 |
+
return {name: out[name] for name in names}
|
510 |
+
|
511 |
+
def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
|
512 |
+
"""Space used by files and optionally directories within a path
|
513 |
+
|
514 |
+
Directory size does not include the size of its contents.
|
515 |
+
|
516 |
+
Parameters
|
517 |
+
----------
|
518 |
+
path: str
|
519 |
+
total: bool
|
520 |
+
Whether to sum all the file sizes
|
521 |
+
maxdepth: int or None
|
522 |
+
Maximum number of directory levels to descend, None for unlimited.
|
523 |
+
withdirs: bool
|
524 |
+
Whether to include directory paths in the output.
|
525 |
+
kwargs: passed to ``find``
|
526 |
+
|
527 |
+
Returns
|
528 |
+
-------
|
529 |
+
Dict of {path: size} if total=False, or int otherwise, where numbers
|
530 |
+
refer to bytes used.
|
531 |
+
"""
|
532 |
+
sizes = {}
|
533 |
+
if withdirs and self.isdir(path):
|
534 |
+
# Include top-level directory in output
|
535 |
+
info = self.info(path)
|
536 |
+
sizes[info["name"]] = info["size"]
|
537 |
+
for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
|
538 |
+
info = self.info(f)
|
539 |
+
sizes[info["name"]] = info["size"]
|
540 |
+
if total:
|
541 |
+
return sum(sizes.values())
|
542 |
+
else:
|
543 |
+
return sizes
|
544 |
+
|
545 |
+
def glob(self, path, maxdepth=None, **kwargs):
|
546 |
+
"""
|
547 |
+
Find files by glob-matching.
|
548 |
+
|
549 |
+
If the path ends with '/', only folders are returned.
|
550 |
+
|
551 |
+
We support ``"**"``,
|
552 |
+
``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
|
553 |
+
|
554 |
+
The `maxdepth` option is applied on the first `**` found in the path.
|
555 |
+
|
556 |
+
kwargs are passed to ``ls``.
|
557 |
+
"""
|
558 |
+
if maxdepth is not None and maxdepth < 1:
|
559 |
+
raise ValueError("maxdepth must be at least 1")
|
560 |
+
|
561 |
+
import re
|
562 |
+
|
563 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
564 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
565 |
+
path = self._strip_protocol(path)
|
566 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
567 |
+
tuple(sep + "**" for sep in seps)
|
568 |
+
)
|
569 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
570 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
571 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
572 |
+
|
573 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
574 |
+
|
575 |
+
detail = kwargs.pop("detail", False)
|
576 |
+
|
577 |
+
if not has_magic(path):
|
578 |
+
if self.exists(path, **kwargs):
|
579 |
+
if not detail:
|
580 |
+
return [path]
|
581 |
+
else:
|
582 |
+
return {path: self.info(path, **kwargs)}
|
583 |
+
else:
|
584 |
+
if not detail:
|
585 |
+
return [] # glob of non-existent returns empty
|
586 |
+
else:
|
587 |
+
return {}
|
588 |
+
elif "/" in path[:min_idx]:
|
589 |
+
min_idx = path[:min_idx].rindex("/")
|
590 |
+
root = path[: min_idx + 1]
|
591 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
592 |
+
else:
|
593 |
+
root = ""
|
594 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
595 |
+
|
596 |
+
if "**" in path:
|
597 |
+
if maxdepth is not None:
|
598 |
+
idx_double_stars = path.find("**")
|
599 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
600 |
+
depth = depth - depth_double_stars + maxdepth
|
601 |
+
else:
|
602 |
+
depth = None
|
603 |
+
|
604 |
+
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
605 |
+
|
606 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
607 |
+
pattern = re.compile(pattern)
|
608 |
+
|
609 |
+
out = {
|
610 |
+
p: info
|
611 |
+
for p, info in sorted(allpaths.items())
|
612 |
+
if pattern.match(
|
613 |
+
(
|
614 |
+
p + "/"
|
615 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
616 |
+
else p
|
617 |
+
)
|
618 |
+
)
|
619 |
+
}
|
620 |
+
|
621 |
+
if detail:
|
622 |
+
return out
|
623 |
+
else:
|
624 |
+
return list(out)
|
625 |
+
|
626 |
+
def exists(self, path, **kwargs):
|
627 |
+
"""Is there a file at the given path"""
|
628 |
+
try:
|
629 |
+
self.info(path, **kwargs)
|
630 |
+
return True
|
631 |
+
except: # noqa: E722
|
632 |
+
# any exception allowed bar FileNotFoundError?
|
633 |
+
return False
|
634 |
+
|
635 |
+
def lexists(self, path, **kwargs):
|
636 |
+
"""If there is a file at the given path (including
|
637 |
+
broken links)"""
|
638 |
+
return self.exists(path)
|
639 |
+
|
640 |
+
def info(self, path, **kwargs):
|
641 |
+
"""Give details of entry at path
|
642 |
+
|
643 |
+
Returns a single dictionary, with exactly the same information as ``ls``
|
644 |
+
would with ``detail=True``.
|
645 |
+
|
646 |
+
The default implementation should calls ls and could be overridden by a
|
647 |
+
shortcut. kwargs are passed on to ```ls()``.
|
648 |
+
|
649 |
+
Some file systems might not be able to measure the file's size, in
|
650 |
+
which case, the returned dict will include ``'size': None``.
|
651 |
+
|
652 |
+
Returns
|
653 |
+
-------
|
654 |
+
dict with keys: name (full path in the FS), size (in bytes), type (file,
|
655 |
+
directory, or something else) and other FS-specific keys.
|
656 |
+
"""
|
657 |
+
path = self._strip_protocol(path)
|
658 |
+
out = self.ls(self._parent(path), detail=True, **kwargs)
|
659 |
+
out = [o for o in out if o["name"].rstrip("/") == path]
|
660 |
+
if out:
|
661 |
+
return out[0]
|
662 |
+
out = self.ls(path, detail=True, **kwargs)
|
663 |
+
path = path.rstrip("/")
|
664 |
+
out1 = [o for o in out if o["name"].rstrip("/") == path]
|
665 |
+
if len(out1) == 1:
|
666 |
+
if "size" not in out1[0]:
|
667 |
+
out1[0]["size"] = None
|
668 |
+
return out1[0]
|
669 |
+
elif len(out1) > 1 or out:
|
670 |
+
return {"name": path, "size": 0, "type": "directory"}
|
671 |
+
else:
|
672 |
+
raise FileNotFoundError(path)
|
673 |
+
|
674 |
+
def checksum(self, path):
|
675 |
+
"""Unique value for current version of file
|
676 |
+
|
677 |
+
If the checksum is the same from one moment to another, the contents
|
678 |
+
are guaranteed to be the same. If the checksum changes, the contents
|
679 |
+
*might* have changed.
|
680 |
+
|
681 |
+
This should normally be overridden; default will probably capture
|
682 |
+
creation/modification timestamp (which would be good) or maybe
|
683 |
+
access timestamp (which would be bad)
|
684 |
+
"""
|
685 |
+
return int(tokenize(self.info(path)), 16)
|
686 |
+
|
687 |
+
def size(self, path):
|
688 |
+
"""Size in bytes of file"""
|
689 |
+
return self.info(path).get("size", None)
|
690 |
+
|
691 |
+
def sizes(self, paths):
|
692 |
+
"""Size in bytes of each file in a list of paths"""
|
693 |
+
return [self.size(p) for p in paths]
|
694 |
+
|
695 |
+
def isdir(self, path):
|
696 |
+
"""Is this entry directory-like?"""
|
697 |
+
try:
|
698 |
+
return self.info(path)["type"] == "directory"
|
699 |
+
except OSError:
|
700 |
+
return False
|
701 |
+
|
702 |
+
def isfile(self, path):
|
703 |
+
"""Is this entry file-like?"""
|
704 |
+
try:
|
705 |
+
return self.info(path)["type"] == "file"
|
706 |
+
except: # noqa: E722
|
707 |
+
return False
|
708 |
+
|
709 |
+
def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
|
710 |
+
"""Get the contents of the file as a string.
|
711 |
+
|
712 |
+
Parameters
|
713 |
+
----------
|
714 |
+
path: str
|
715 |
+
URL of file on this filesystems
|
716 |
+
encoding, errors, newline: same as `open`.
|
717 |
+
"""
|
718 |
+
with self.open(
|
719 |
+
path,
|
720 |
+
mode="r",
|
721 |
+
encoding=encoding,
|
722 |
+
errors=errors,
|
723 |
+
newline=newline,
|
724 |
+
**kwargs,
|
725 |
+
) as f:
|
726 |
+
return f.read()
|
727 |
+
|
728 |
+
def write_text(
|
729 |
+
self, path, value, encoding=None, errors=None, newline=None, **kwargs
|
730 |
+
):
|
731 |
+
"""Write the text to the given file.
|
732 |
+
|
733 |
+
An existing file will be overwritten.
|
734 |
+
|
735 |
+
Parameters
|
736 |
+
----------
|
737 |
+
path: str
|
738 |
+
URL of file on this filesystems
|
739 |
+
value: str
|
740 |
+
Text to write.
|
741 |
+
encoding, errors, newline: same as `open`.
|
742 |
+
"""
|
743 |
+
with self.open(
|
744 |
+
path,
|
745 |
+
mode="w",
|
746 |
+
encoding=encoding,
|
747 |
+
errors=errors,
|
748 |
+
newline=newline,
|
749 |
+
**kwargs,
|
750 |
+
) as f:
|
751 |
+
return f.write(value)
|
752 |
+
|
753 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
754 |
+
"""Get the content of a file
|
755 |
+
|
756 |
+
Parameters
|
757 |
+
----------
|
758 |
+
path: URL of file on this filesystems
|
759 |
+
start, end: int
|
760 |
+
Bytes limits of the read. If negative, backwards from end,
|
761 |
+
like usual python slices. Either can be None for start or
|
762 |
+
end of file, respectively
|
763 |
+
kwargs: passed to ``open()``.
|
764 |
+
"""
|
765 |
+
# explicitly set buffering off?
|
766 |
+
with self.open(path, "rb", **kwargs) as f:
|
767 |
+
if start is not None:
|
768 |
+
if start >= 0:
|
769 |
+
f.seek(start)
|
770 |
+
else:
|
771 |
+
f.seek(max(0, f.size + start))
|
772 |
+
if end is not None:
|
773 |
+
if end < 0:
|
774 |
+
end = f.size + end
|
775 |
+
return f.read(end - f.tell())
|
776 |
+
return f.read()
|
777 |
+
|
778 |
+
def pipe_file(self, path, value, **kwargs):
|
779 |
+
"""Set the bytes of given file"""
|
780 |
+
with self.open(path, "wb", **kwargs) as f:
|
781 |
+
f.write(value)
|
782 |
+
|
783 |
+
def pipe(self, path, value=None, **kwargs):
|
784 |
+
"""Put value into path
|
785 |
+
|
786 |
+
(counterpart to ``cat``)
|
787 |
+
|
788 |
+
Parameters
|
789 |
+
----------
|
790 |
+
path: string or dict(str, bytes)
|
791 |
+
If a string, a single remote location to put ``value`` bytes; if a dict,
|
792 |
+
a mapping of {path: bytesvalue}.
|
793 |
+
value: bytes, optional
|
794 |
+
If using a single path, these are the bytes to put there. Ignored if
|
795 |
+
``path`` is a dict
|
796 |
+
"""
|
797 |
+
if isinstance(path, str):
|
798 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
799 |
+
elif isinstance(path, dict):
|
800 |
+
for k, v in path.items():
|
801 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
802 |
+
else:
|
803 |
+
raise ValueError("path must be str or dict")
|
804 |
+
|
805 |
+
def cat_ranges(
|
806 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
807 |
+
):
|
808 |
+
"""Get the contents of byte ranges from one or more files
|
809 |
+
|
810 |
+
Parameters
|
811 |
+
----------
|
812 |
+
paths: list
|
813 |
+
A list of of filepaths on this filesystems
|
814 |
+
starts, ends: int or list
|
815 |
+
Bytes limits of the read. If using a single int, the same value will be
|
816 |
+
used to read all the specified files.
|
817 |
+
"""
|
818 |
+
if max_gap is not None:
|
819 |
+
raise NotImplementedError
|
820 |
+
if not isinstance(paths, list):
|
821 |
+
raise TypeError
|
822 |
+
if not isinstance(starts, list):
|
823 |
+
starts = [starts] * len(paths)
|
824 |
+
if not isinstance(ends, list):
|
825 |
+
ends = [ends] * len(paths)
|
826 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
827 |
+
raise ValueError
|
828 |
+
out = []
|
829 |
+
for p, s, e in zip(paths, starts, ends):
|
830 |
+
try:
|
831 |
+
out.append(self.cat_file(p, s, e))
|
832 |
+
except Exception as e:
|
833 |
+
if on_error == "return":
|
834 |
+
out.append(e)
|
835 |
+
else:
|
836 |
+
raise
|
837 |
+
return out
|
838 |
+
|
839 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
840 |
+
"""Fetch (potentially multiple) paths' contents
|
841 |
+
|
842 |
+
Parameters
|
843 |
+
----------
|
844 |
+
recursive: bool
|
845 |
+
If True, assume the path(s) are directories, and get all the
|
846 |
+
contained files
|
847 |
+
on_error : "raise", "omit", "return"
|
848 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
849 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
850 |
+
will simply not be included in the output; if "return", all keys are
|
851 |
+
included in the output, but the value will be bytes or an exception
|
852 |
+
instance.
|
853 |
+
kwargs: passed to cat_file
|
854 |
+
|
855 |
+
Returns
|
856 |
+
-------
|
857 |
+
dict of {path: contents} if there are multiple paths
|
858 |
+
or the path has been otherwise expanded
|
859 |
+
"""
|
860 |
+
paths = self.expand_path(path, recursive=recursive)
|
861 |
+
if (
|
862 |
+
len(paths) > 1
|
863 |
+
or isinstance(path, list)
|
864 |
+
or paths[0] != self._strip_protocol(path)
|
865 |
+
):
|
866 |
+
out = {}
|
867 |
+
for path in paths:
|
868 |
+
try:
|
869 |
+
out[path] = self.cat_file(path, **kwargs)
|
870 |
+
except Exception as e:
|
871 |
+
if on_error == "raise":
|
872 |
+
raise
|
873 |
+
if on_error == "return":
|
874 |
+
out[path] = e
|
875 |
+
return out
|
876 |
+
else:
|
877 |
+
return self.cat_file(paths[0], **kwargs)
|
878 |
+
|
879 |
+
def get_file(
|
880 |
+
self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
|
881 |
+
):
|
882 |
+
"""Copy single remote file to local"""
|
883 |
+
from .implementations.local import LocalFileSystem
|
884 |
+
|
885 |
+
if isfilelike(lpath):
|
886 |
+
outfile = lpath
|
887 |
+
elif self.isdir(rpath):
|
888 |
+
os.makedirs(lpath, exist_ok=True)
|
889 |
+
return None
|
890 |
+
|
891 |
+
fs = LocalFileSystem(auto_mkdir=True)
|
892 |
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
893 |
+
|
894 |
+
with self.open(rpath, "rb", **kwargs) as f1:
|
895 |
+
if outfile is None:
|
896 |
+
outfile = open(lpath, "wb")
|
897 |
+
|
898 |
+
try:
|
899 |
+
callback.set_size(getattr(f1, "size", None))
|
900 |
+
data = True
|
901 |
+
while data:
|
902 |
+
data = f1.read(self.blocksize)
|
903 |
+
segment_len = outfile.write(data)
|
904 |
+
if segment_len is None:
|
905 |
+
segment_len = len(data)
|
906 |
+
callback.relative_update(segment_len)
|
907 |
+
finally:
|
908 |
+
if not isfilelike(lpath):
|
909 |
+
outfile.close()
|
910 |
+
|
911 |
+
def get(
|
912 |
+
self,
|
913 |
+
rpath,
|
914 |
+
lpath,
|
915 |
+
recursive=False,
|
916 |
+
callback=_DEFAULT_CALLBACK,
|
917 |
+
maxdepth=None,
|
918 |
+
**kwargs,
|
919 |
+
):
|
920 |
+
"""Copy file(s) to local.
|
921 |
+
|
922 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
923 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
924 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
925 |
+
and will be expanded.
|
926 |
+
|
927 |
+
Calls get_file for each source.
|
928 |
+
"""
|
929 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
930 |
+
# No need to expand paths when both source and destination
|
931 |
+
# are provided as lists
|
932 |
+
rpaths = rpath
|
933 |
+
lpaths = lpath
|
934 |
+
else:
|
935 |
+
from .implementations.local import (
|
936 |
+
LocalFileSystem,
|
937 |
+
make_path_posix,
|
938 |
+
trailing_sep,
|
939 |
+
)
|
940 |
+
|
941 |
+
source_is_str = isinstance(rpath, str)
|
942 |
+
rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
|
943 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
944 |
+
# Non-recursive glob does not copy directories
|
945 |
+
rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
|
946 |
+
if not rpaths:
|
947 |
+
return
|
948 |
+
|
949 |
+
if isinstance(lpath, str):
|
950 |
+
lpath = make_path_posix(lpath)
|
951 |
+
|
952 |
+
source_is_file = len(rpaths) == 1
|
953 |
+
dest_is_dir = isinstance(lpath, str) and (
|
954 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
955 |
+
)
|
956 |
+
|
957 |
+
exists = source_is_str and (
|
958 |
+
(has_magic(rpath) and source_is_file)
|
959 |
+
or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
|
960 |
+
)
|
961 |
+
lpaths = other_paths(
|
962 |
+
rpaths,
|
963 |
+
lpath,
|
964 |
+
exists=exists,
|
965 |
+
flatten=not source_is_str,
|
966 |
+
)
|
967 |
+
|
968 |
+
callback.set_size(len(lpaths))
|
969 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
970 |
+
callback.branch(rpath, lpath, kwargs)
|
971 |
+
self.get_file(rpath, lpath, **kwargs)
|
972 |
+
|
973 |
+
def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
|
974 |
+
"""Copy single file to remote"""
|
975 |
+
if os.path.isdir(lpath):
|
976 |
+
self.makedirs(rpath, exist_ok=True)
|
977 |
+
return None
|
978 |
+
|
979 |
+
with open(lpath, "rb") as f1:
|
980 |
+
size = f1.seek(0, 2)
|
981 |
+
callback.set_size(size)
|
982 |
+
f1.seek(0)
|
983 |
+
|
984 |
+
self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
|
985 |
+
with self.open(rpath, "wb", **kwargs) as f2:
|
986 |
+
while f1.tell() < size:
|
987 |
+
data = f1.read(self.blocksize)
|
988 |
+
segment_len = f2.write(data)
|
989 |
+
if segment_len is None:
|
990 |
+
segment_len = len(data)
|
991 |
+
callback.relative_update(segment_len)
|
992 |
+
|
993 |
+
def put(
|
994 |
+
self,
|
995 |
+
lpath,
|
996 |
+
rpath,
|
997 |
+
recursive=False,
|
998 |
+
callback=_DEFAULT_CALLBACK,
|
999 |
+
maxdepth=None,
|
1000 |
+
**kwargs,
|
1001 |
+
):
|
1002 |
+
"""Copy file(s) from local.
|
1003 |
+
|
1004 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
1005 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
1006 |
+
will go within.
|
1007 |
+
|
1008 |
+
Calls put_file for each source.
|
1009 |
+
"""
|
1010 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
1011 |
+
# No need to expand paths when both source and destination
|
1012 |
+
# are provided as lists
|
1013 |
+
rpaths = rpath
|
1014 |
+
lpaths = lpath
|
1015 |
+
else:
|
1016 |
+
from .implementations.local import (
|
1017 |
+
LocalFileSystem,
|
1018 |
+
make_path_posix,
|
1019 |
+
trailing_sep,
|
1020 |
+
)
|
1021 |
+
|
1022 |
+
source_is_str = isinstance(lpath, str)
|
1023 |
+
if source_is_str:
|
1024 |
+
lpath = make_path_posix(lpath)
|
1025 |
+
fs = LocalFileSystem()
|
1026 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
1027 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
1028 |
+
# Non-recursive glob does not copy directories
|
1029 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
1030 |
+
if not lpaths:
|
1031 |
+
return
|
1032 |
+
|
1033 |
+
source_is_file = len(lpaths) == 1
|
1034 |
+
dest_is_dir = isinstance(rpath, str) and (
|
1035 |
+
trailing_sep(rpath) or self.isdir(rpath)
|
1036 |
+
)
|
1037 |
+
|
1038 |
+
rpath = (
|
1039 |
+
self._strip_protocol(rpath)
|
1040 |
+
if isinstance(rpath, str)
|
1041 |
+
else [self._strip_protocol(p) for p in rpath]
|
1042 |
+
)
|
1043 |
+
exists = source_is_str and (
|
1044 |
+
(has_magic(lpath) and source_is_file)
|
1045 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
1046 |
+
)
|
1047 |
+
rpaths = other_paths(
|
1048 |
+
lpaths,
|
1049 |
+
rpath,
|
1050 |
+
exists=exists,
|
1051 |
+
flatten=not source_is_str,
|
1052 |
+
)
|
1053 |
+
|
1054 |
+
callback.set_size(len(rpaths))
|
1055 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
1056 |
+
callback.branch(lpath, rpath, kwargs)
|
1057 |
+
self.put_file(lpath, rpath, **kwargs)
|
1058 |
+
|
1059 |
+
def head(self, path, size=1024):
|
1060 |
+
"""Get the first ``size`` bytes from file"""
|
1061 |
+
with self.open(path, "rb") as f:
|
1062 |
+
return f.read(size)
|
1063 |
+
|
1064 |
+
def tail(self, path, size=1024):
|
1065 |
+
"""Get the last ``size`` bytes from file"""
|
1066 |
+
with self.open(path, "rb") as f:
|
1067 |
+
f.seek(max(-size, -f.size), 2)
|
1068 |
+
return f.read()
|
1069 |
+
|
1070 |
+
def cp_file(self, path1, path2, **kwargs):
|
1071 |
+
raise NotImplementedError
|
1072 |
+
|
1073 |
+
def copy(
|
1074 |
+
self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
|
1075 |
+
):
|
1076 |
+
"""Copy within two locations in the filesystem
|
1077 |
+
|
1078 |
+
on_error : "raise", "ignore"
|
1079 |
+
If raise, any not-found exceptions will be raised; if ignore any
|
1080 |
+
not-found exceptions will cause the path to be skipped; defaults to
|
1081 |
+
raise unless recursive is true, where the default is ignore
|
1082 |
+
"""
|
1083 |
+
if on_error is None and recursive:
|
1084 |
+
on_error = "ignore"
|
1085 |
+
elif on_error is None:
|
1086 |
+
on_error = "raise"
|
1087 |
+
|
1088 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
1089 |
+
# No need to expand paths when both source and destination
|
1090 |
+
# are provided as lists
|
1091 |
+
paths1 = path1
|
1092 |
+
paths2 = path2
|
1093 |
+
else:
|
1094 |
+
from .implementations.local import trailing_sep
|
1095 |
+
|
1096 |
+
source_is_str = isinstance(path1, str)
|
1097 |
+
paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
|
1098 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
1099 |
+
# Non-recursive glob does not copy directories
|
1100 |
+
paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
|
1101 |
+
if not paths1:
|
1102 |
+
return
|
1103 |
+
|
1104 |
+
source_is_file = len(paths1) == 1
|
1105 |
+
dest_is_dir = isinstance(path2, str) and (
|
1106 |
+
trailing_sep(path2) or self.isdir(path2)
|
1107 |
+
)
|
1108 |
+
|
1109 |
+
exists = source_is_str and (
|
1110 |
+
(has_magic(path1) and source_is_file)
|
1111 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
1112 |
+
)
|
1113 |
+
paths2 = other_paths(
|
1114 |
+
paths1,
|
1115 |
+
path2,
|
1116 |
+
exists=exists,
|
1117 |
+
flatten=not source_is_str,
|
1118 |
+
)
|
1119 |
+
|
1120 |
+
for p1, p2 in zip(paths1, paths2):
|
1121 |
+
try:
|
1122 |
+
self.cp_file(p1, p2, **kwargs)
|
1123 |
+
except FileNotFoundError:
|
1124 |
+
if on_error == "raise":
|
1125 |
+
raise
|
1126 |
+
|
1127 |
+
def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
|
1128 |
+
"""Turn one or more globs or directories into a list of all matching paths
|
1129 |
+
to files or directories.
|
1130 |
+
|
1131 |
+
kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
|
1132 |
+
"""
|
1133 |
+
|
1134 |
+
if maxdepth is not None and maxdepth < 1:
|
1135 |
+
raise ValueError("maxdepth must be at least 1")
|
1136 |
+
|
1137 |
+
if isinstance(path, str):
|
1138 |
+
out = self.expand_path([path], recursive, maxdepth)
|
1139 |
+
else:
|
1140 |
+
out = set()
|
1141 |
+
path = [self._strip_protocol(p) for p in path]
|
1142 |
+
for p in path:
|
1143 |
+
if has_magic(p):
|
1144 |
+
bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
|
1145 |
+
out |= bit
|
1146 |
+
if recursive:
|
1147 |
+
# glob call above expanded one depth so if maxdepth is defined
|
1148 |
+
# then decrement it in expand_path call below. If it is zero
|
1149 |
+
# after decrementing then avoid expand_path call.
|
1150 |
+
if maxdepth is not None and maxdepth <= 1:
|
1151 |
+
continue
|
1152 |
+
out |= set(
|
1153 |
+
self.expand_path(
|
1154 |
+
list(bit),
|
1155 |
+
recursive=recursive,
|
1156 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
1157 |
+
**kwargs,
|
1158 |
+
)
|
1159 |
+
)
|
1160 |
+
continue
|
1161 |
+
elif recursive:
|
1162 |
+
rec = set(
|
1163 |
+
self.find(
|
1164 |
+
p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
|
1165 |
+
)
|
1166 |
+
)
|
1167 |
+
out |= rec
|
1168 |
+
if p not in out and (recursive is False or self.exists(p)):
|
1169 |
+
# should only check once, for the root
|
1170 |
+
out.add(p)
|
1171 |
+
if not out:
|
1172 |
+
raise FileNotFoundError(path)
|
1173 |
+
return sorted(out)
|
1174 |
+
|
1175 |
+
def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
|
1176 |
+
"""Move file(s) from one location to another"""
|
1177 |
+
if path1 == path2:
|
1178 |
+
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
1179 |
+
else:
|
1180 |
+
self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
|
1181 |
+
self.rm(path1, recursive=recursive)
|
1182 |
+
|
1183 |
+
def rm_file(self, path):
|
1184 |
+
"""Delete a file"""
|
1185 |
+
self._rm(path)
|
1186 |
+
|
1187 |
+
def _rm(self, path):
|
1188 |
+
"""Delete one file"""
|
1189 |
+
# this is the old name for the method, prefer rm_file
|
1190 |
+
raise NotImplementedError
|
1191 |
+
|
1192 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
1193 |
+
"""Delete files.
|
1194 |
+
|
1195 |
+
Parameters
|
1196 |
+
----------
|
1197 |
+
path: str or list of str
|
1198 |
+
File(s) to delete.
|
1199 |
+
recursive: bool
|
1200 |
+
If file(s) are directories, recursively delete contents and then
|
1201 |
+
also remove the directory
|
1202 |
+
maxdepth: int or None
|
1203 |
+
Depth to pass to walk for finding files to delete, if recursive.
|
1204 |
+
If None, there will be no limit and infinite recursion may be
|
1205 |
+
possible.
|
1206 |
+
"""
|
1207 |
+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
1208 |
+
for p in reversed(path):
|
1209 |
+
self.rm_file(p)
|
1210 |
+
|
1211 |
+
@classmethod
|
1212 |
+
def _parent(cls, path):
|
1213 |
+
path = cls._strip_protocol(path)
|
1214 |
+
if "/" in path:
|
1215 |
+
parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
|
1216 |
+
return cls.root_marker + parent
|
1217 |
+
else:
|
1218 |
+
return cls.root_marker
|
1219 |
+
|
1220 |
+
def _open(
|
1221 |
+
self,
|
1222 |
+
path,
|
1223 |
+
mode="rb",
|
1224 |
+
block_size=None,
|
1225 |
+
autocommit=True,
|
1226 |
+
cache_options=None,
|
1227 |
+
**kwargs,
|
1228 |
+
):
|
1229 |
+
"""Return raw bytes-mode file-like from the file-system"""
|
1230 |
+
return AbstractBufferedFile(
|
1231 |
+
self,
|
1232 |
+
path,
|
1233 |
+
mode,
|
1234 |
+
block_size,
|
1235 |
+
autocommit,
|
1236 |
+
cache_options=cache_options,
|
1237 |
+
**kwargs,
|
1238 |
+
)
|
1239 |
+
|
1240 |
+
def open(
|
1241 |
+
self,
|
1242 |
+
path,
|
1243 |
+
mode="rb",
|
1244 |
+
block_size=None,
|
1245 |
+
cache_options=None,
|
1246 |
+
compression=None,
|
1247 |
+
**kwargs,
|
1248 |
+
):
|
1249 |
+
"""
|
1250 |
+
Return a file-like object from the filesystem
|
1251 |
+
|
1252 |
+
The resultant instance must function correctly in a context ``with``
|
1253 |
+
block.
|
1254 |
+
|
1255 |
+
Parameters
|
1256 |
+
----------
|
1257 |
+
path: str
|
1258 |
+
Target file
|
1259 |
+
mode: str like 'rb', 'w'
|
1260 |
+
See builtin ``open()``
|
1261 |
+
block_size: int
|
1262 |
+
Some indication of buffering - this is a value in bytes
|
1263 |
+
cache_options : dict, optional
|
1264 |
+
Extra arguments to pass through to the cache.
|
1265 |
+
compression: string or None
|
1266 |
+
If given, open file using compression codec. Can either be a compression
|
1267 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
1268 |
+
compression from the filename suffix.
|
1269 |
+
encoding, errors, newline: passed on to TextIOWrapper for text mode
|
1270 |
+
"""
|
1271 |
+
import io
|
1272 |
+
|
1273 |
+
path = self._strip_protocol(path)
|
1274 |
+
if "b" not in mode:
|
1275 |
+
mode = mode.replace("t", "") + "b"
|
1276 |
+
|
1277 |
+
text_kwargs = {
|
1278 |
+
k: kwargs.pop(k)
|
1279 |
+
for k in ["encoding", "errors", "newline"]
|
1280 |
+
if k in kwargs
|
1281 |
+
}
|
1282 |
+
return io.TextIOWrapper(
|
1283 |
+
self.open(
|
1284 |
+
path,
|
1285 |
+
mode,
|
1286 |
+
block_size=block_size,
|
1287 |
+
cache_options=cache_options,
|
1288 |
+
compression=compression,
|
1289 |
+
**kwargs,
|
1290 |
+
),
|
1291 |
+
**text_kwargs,
|
1292 |
+
)
|
1293 |
+
else:
|
1294 |
+
ac = kwargs.pop("autocommit", not self._intrans)
|
1295 |
+
f = self._open(
|
1296 |
+
path,
|
1297 |
+
mode=mode,
|
1298 |
+
block_size=block_size,
|
1299 |
+
autocommit=ac,
|
1300 |
+
cache_options=cache_options,
|
1301 |
+
**kwargs,
|
1302 |
+
)
|
1303 |
+
if compression is not None:
|
1304 |
+
from fsspec.compression import compr
|
1305 |
+
from fsspec.core import get_compression
|
1306 |
+
|
1307 |
+
compression = get_compression(path, compression)
|
1308 |
+
compress = compr[compression]
|
1309 |
+
f = compress(f, mode=mode[0])
|
1310 |
+
|
1311 |
+
if not ac and "r" not in mode:
|
1312 |
+
self.transaction.files.append(f)
|
1313 |
+
return f
|
1314 |
+
|
1315 |
+
def touch(self, path, truncate=True, **kwargs):
|
1316 |
+
"""Create empty file, or update timestamp
|
1317 |
+
|
1318 |
+
Parameters
|
1319 |
+
----------
|
1320 |
+
path: str
|
1321 |
+
file location
|
1322 |
+
truncate: bool
|
1323 |
+
If True, always set file size to 0; if False, update timestamp and
|
1324 |
+
leave file unchanged, if backend allows this
|
1325 |
+
"""
|
1326 |
+
if truncate or not self.exists(path):
|
1327 |
+
with self.open(path, "wb", **kwargs):
|
1328 |
+
pass
|
1329 |
+
else:
|
1330 |
+
raise NotImplementedError # update timestamp, if possible
|
1331 |
+
|
1332 |
+
def ukey(self, path):
|
1333 |
+
"""Hash of file properties, to tell if it has changed"""
|
1334 |
+
return sha256(str(self.info(path)).encode()).hexdigest()
|
1335 |
+
|
1336 |
+
def read_block(self, fn, offset, length, delimiter=None):
|
1337 |
+
"""Read a block of bytes from
|
1338 |
+
|
1339 |
+
Starting at ``offset`` of the file, read ``length`` bytes. If
|
1340 |
+
``delimiter`` is set then we ensure that the read starts and stops at
|
1341 |
+
delimiter boundaries that follow the locations ``offset`` and ``offset
|
1342 |
+
+ length``. If ``offset`` is zero then we start at zero. The
|
1343 |
+
bytestring returned WILL include the end delimiter string.
|
1344 |
+
|
1345 |
+
If offset+length is beyond the eof, reads to eof.
|
1346 |
+
|
1347 |
+
Parameters
|
1348 |
+
----------
|
1349 |
+
fn: string
|
1350 |
+
Path to filename
|
1351 |
+
offset: int
|
1352 |
+
Byte offset to start read
|
1353 |
+
length: int
|
1354 |
+
Number of bytes to read. If None, read to end.
|
1355 |
+
delimiter: bytes (optional)
|
1356 |
+
Ensure reading starts and stops at delimiter bytestring
|
1357 |
+
|
1358 |
+
Examples
|
1359 |
+
--------
|
1360 |
+
>>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
|
1361 |
+
b'Alice, 100\\nBo'
|
1362 |
+
>>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
1363 |
+
b'Alice, 100\\nBob, 200\\n'
|
1364 |
+
|
1365 |
+
Use ``length=None`` to read to the end of the file.
|
1366 |
+
>>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
|
1367 |
+
b'Alice, 100\\nBob, 200\\nCharlie, 300'
|
1368 |
+
|
1369 |
+
See Also
|
1370 |
+
--------
|
1371 |
+
:func:`fsspec.utils.read_block`
|
1372 |
+
"""
|
1373 |
+
with self.open(fn, "rb") as f:
|
1374 |
+
size = f.size
|
1375 |
+
if length is None:
|
1376 |
+
length = size
|
1377 |
+
if size is not None and offset + length > size:
|
1378 |
+
length = size - offset
|
1379 |
+
return read_block(f, offset, length, delimiter)
|
1380 |
+
|
1381 |
+
def to_json(self):
|
1382 |
+
"""
|
1383 |
+
JSON representation of this filesystem instance
|
1384 |
+
|
1385 |
+
Returns
|
1386 |
+
-------
|
1387 |
+
str: JSON structure with keys cls (the python location of this class),
|
1388 |
+
protocol (text name of this class's protocol, first one in case of
|
1389 |
+
multiple), args (positional args, usually empty), and all other
|
1390 |
+
kwargs as their own keys.
|
1391 |
+
"""
|
1392 |
+
import json
|
1393 |
+
|
1394 |
+
cls = type(self)
|
1395 |
+
cls = ".".join((cls.__module__, cls.__name__))
|
1396 |
+
proto = (
|
1397 |
+
self.protocol[0]
|
1398 |
+
if isinstance(self.protocol, (tuple, list))
|
1399 |
+
else self.protocol
|
1400 |
+
)
|
1401 |
+
return json.dumps(
|
1402 |
+
dict(
|
1403 |
+
**{"cls": cls, "protocol": proto, "args": self.storage_args},
|
1404 |
+
**self.storage_options,
|
1405 |
+
)
|
1406 |
+
)
|
1407 |
+
|
1408 |
+
@staticmethod
|
1409 |
+
def from_json(blob):
|
1410 |
+
"""
|
1411 |
+
Recreate a filesystem instance from JSON representation
|
1412 |
+
|
1413 |
+
See ``.to_json()`` for the expected structure of the input
|
1414 |
+
|
1415 |
+
Parameters
|
1416 |
+
----------
|
1417 |
+
blob: str
|
1418 |
+
|
1419 |
+
Returns
|
1420 |
+
-------
|
1421 |
+
file system instance, not necessarily of this particular class.
|
1422 |
+
"""
|
1423 |
+
import json
|
1424 |
+
|
1425 |
+
from .registry import _import_class, get_filesystem_class
|
1426 |
+
|
1427 |
+
dic = json.loads(blob)
|
1428 |
+
protocol = dic.pop("protocol")
|
1429 |
+
try:
|
1430 |
+
cls = _import_class(dic.pop("cls"))
|
1431 |
+
except (ImportError, ValueError, RuntimeError, KeyError):
|
1432 |
+
cls = get_filesystem_class(protocol)
|
1433 |
+
return cls(*dic.pop("args", ()), **dic)
|
1434 |
+
|
1435 |
+
def _get_pyarrow_filesystem(self):
|
1436 |
+
"""
|
1437 |
+
Make a version of the FS instance which will be acceptable to pyarrow
|
1438 |
+
"""
|
1439 |
+
# all instances already also derive from pyarrow
|
1440 |
+
return self
|
1441 |
+
|
1442 |
+
def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
|
1443 |
+
"""Create key/value store based on this file-system
|
1444 |
+
|
1445 |
+
Makes a MutableMapping interface to the FS at the given root path.
|
1446 |
+
See ``fsspec.mapping.FSMap`` for further details.
|
1447 |
+
"""
|
1448 |
+
from .mapping import FSMap
|
1449 |
+
|
1450 |
+
return FSMap(
|
1451 |
+
root,
|
1452 |
+
self,
|
1453 |
+
check=check,
|
1454 |
+
create=create,
|
1455 |
+
missing_exceptions=missing_exceptions,
|
1456 |
+
)
|
1457 |
+
|
1458 |
+
@classmethod
|
1459 |
+
def clear_instance_cache(cls):
|
1460 |
+
"""
|
1461 |
+
Clear the cache of filesystem instances.
|
1462 |
+
|
1463 |
+
Notes
|
1464 |
+
-----
|
1465 |
+
Unless overridden by setting the ``cachable`` class attribute to False,
|
1466 |
+
the filesystem class stores a reference to newly created instances. This
|
1467 |
+
prevents Python's normal rules around garbage collection from working,
|
1468 |
+
since the instances refcount will not drop to zero until
|
1469 |
+
``clear_instance_cache`` is called.
|
1470 |
+
"""
|
1471 |
+
cls._cache.clear()
|
1472 |
+
|
1473 |
+
def created(self, path):
|
1474 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
1475 |
+
raise NotImplementedError
|
1476 |
+
|
1477 |
+
def modified(self, path):
|
1478 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
1479 |
+
raise NotImplementedError
|
1480 |
+
|
1481 |
+
# ------------------------------------------------------------------------
|
1482 |
+
# Aliases
|
1483 |
+
|
1484 |
+
def read_bytes(self, path, start=None, end=None, **kwargs):
|
1485 |
+
"""Alias of `AbstractFileSystem.cat_file`."""
|
1486 |
+
return self.cat_file(path, start=start, end=end, **kwargs)
|
1487 |
+
|
1488 |
+
def write_bytes(self, path, value, **kwargs):
|
1489 |
+
"""Alias of `AbstractFileSystem.pipe_file`."""
|
1490 |
+
self.pipe_file(path, value, **kwargs)
|
1491 |
+
|
1492 |
+
def makedir(self, path, create_parents=True, **kwargs):
|
1493 |
+
"""Alias of `AbstractFileSystem.mkdir`."""
|
1494 |
+
return self.mkdir(path, create_parents=create_parents, **kwargs)
|
1495 |
+
|
1496 |
+
def mkdirs(self, path, exist_ok=False):
|
1497 |
+
"""Alias of `AbstractFileSystem.makedirs`."""
|
1498 |
+
return self.makedirs(path, exist_ok=exist_ok)
|
1499 |
+
|
1500 |
+
def listdir(self, path, detail=True, **kwargs):
|
1501 |
+
"""Alias of `AbstractFileSystem.ls`."""
|
1502 |
+
return self.ls(path, detail=detail, **kwargs)
|
1503 |
+
|
1504 |
+
def cp(self, path1, path2, **kwargs):
|
1505 |
+
"""Alias of `AbstractFileSystem.copy`."""
|
1506 |
+
return self.copy(path1, path2, **kwargs)
|
1507 |
+
|
1508 |
+
def move(self, path1, path2, **kwargs):
|
1509 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
1510 |
+
return self.mv(path1, path2, **kwargs)
|
1511 |
+
|
1512 |
+
def stat(self, path, **kwargs):
|
1513 |
+
"""Alias of `AbstractFileSystem.info`."""
|
1514 |
+
return self.info(path, **kwargs)
|
1515 |
+
|
1516 |
+
def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
|
1517 |
+
"""Alias of `AbstractFileSystem.du`."""
|
1518 |
+
return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
|
1519 |
+
|
1520 |
+
def rename(self, path1, path2, **kwargs):
|
1521 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
1522 |
+
return self.mv(path1, path2, **kwargs)
|
1523 |
+
|
1524 |
+
def delete(self, path, recursive=False, maxdepth=None):
|
1525 |
+
"""Alias of `AbstractFileSystem.rm`."""
|
1526 |
+
return self.rm(path, recursive=recursive, maxdepth=maxdepth)
|
1527 |
+
|
1528 |
+
def upload(self, lpath, rpath, recursive=False, **kwargs):
|
1529 |
+
"""Alias of `AbstractFileSystem.put`."""
|
1530 |
+
return self.put(lpath, rpath, recursive=recursive, **kwargs)
|
1531 |
+
|
1532 |
+
def download(self, rpath, lpath, recursive=False, **kwargs):
|
1533 |
+
"""Alias of `AbstractFileSystem.get`."""
|
1534 |
+
return self.get(rpath, lpath, recursive=recursive, **kwargs)
|
1535 |
+
|
1536 |
+
def sign(self, path, expiration=100, **kwargs):
|
1537 |
+
"""Create a signed URL representing the given path
|
1538 |
+
|
1539 |
+
Some implementations allow temporary URLs to be generated, as a
|
1540 |
+
way of delegating credentials.
|
1541 |
+
|
1542 |
+
Parameters
|
1543 |
+
----------
|
1544 |
+
path : str
|
1545 |
+
The path on the filesystem
|
1546 |
+
expiration : int
|
1547 |
+
Number of seconds to enable the URL for (if supported)
|
1548 |
+
|
1549 |
+
Returns
|
1550 |
+
-------
|
1551 |
+
URL : str
|
1552 |
+
The signed URL
|
1553 |
+
|
1554 |
+
Raises
|
1555 |
+
------
|
1556 |
+
NotImplementedError : if method is not implemented for a filesystem
|
1557 |
+
"""
|
1558 |
+
raise NotImplementedError("Sign is not implemented for this filesystem")
|
1559 |
+
|
1560 |
+
def _isfilestore(self):
|
1561 |
+
# Originally inherited from pyarrow DaskFileSystem. Keeping this
|
1562 |
+
# here for backwards compatibility as long as pyarrow uses its
|
1563 |
+
# legacy fsspec-compatible filesystems and thus accepts fsspec
|
1564 |
+
# filesystems as well
|
1565 |
+
return False
|
1566 |
+
|
1567 |
+
|
1568 |
+
class AbstractBufferedFile(io.IOBase):
|
1569 |
+
"""Convenient class to derive from to provide buffering
|
1570 |
+
|
1571 |
+
In the case that the backend does not provide a pythonic file-like object
|
1572 |
+
already, this class contains much of the logic to build one. The only
|
1573 |
+
methods that need to be overridden are ``_upload_chunk``,
|
1574 |
+
``_initiate_upload`` and ``_fetch_range``.
|
1575 |
+
"""
|
1576 |
+
|
1577 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
1578 |
+
_details = None
|
1579 |
+
|
1580 |
+
def __init__(
|
1581 |
+
self,
|
1582 |
+
fs,
|
1583 |
+
path,
|
1584 |
+
mode="rb",
|
1585 |
+
block_size="default",
|
1586 |
+
autocommit=True,
|
1587 |
+
cache_type="readahead",
|
1588 |
+
cache_options=None,
|
1589 |
+
size=None,
|
1590 |
+
**kwargs,
|
1591 |
+
):
|
1592 |
+
"""
|
1593 |
+
Template for files with buffered reading and writing
|
1594 |
+
|
1595 |
+
Parameters
|
1596 |
+
----------
|
1597 |
+
fs: instance of FileSystem
|
1598 |
+
path: str
|
1599 |
+
location in file-system
|
1600 |
+
mode: str
|
1601 |
+
Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
|
1602 |
+
systems may be read-only, and some may not support append.
|
1603 |
+
block_size: int
|
1604 |
+
Buffer size for reading or writing, 'default' for class default
|
1605 |
+
autocommit: bool
|
1606 |
+
Whether to write to final destination; may only impact what
|
1607 |
+
happens when file is being closed.
|
1608 |
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
1609 |
+
Caching policy in read mode. See the definitions in ``core``.
|
1610 |
+
cache_options : dict
|
1611 |
+
Additional options passed to the constructor for the cache specified
|
1612 |
+
by `cache_type`.
|
1613 |
+
size: int
|
1614 |
+
If given and in read mode, suppressed having to look up the file size
|
1615 |
+
kwargs:
|
1616 |
+
Gets stored as self.kwargs
|
1617 |
+
"""
|
1618 |
+
from .core import caches
|
1619 |
+
|
1620 |
+
self.path = path
|
1621 |
+
self.fs = fs
|
1622 |
+
self.mode = mode
|
1623 |
+
self.blocksize = (
|
1624 |
+
self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
|
1625 |
+
)
|
1626 |
+
self.loc = 0
|
1627 |
+
self.autocommit = autocommit
|
1628 |
+
self.end = None
|
1629 |
+
self.start = None
|
1630 |
+
self.closed = False
|
1631 |
+
|
1632 |
+
if cache_options is None:
|
1633 |
+
cache_options = {}
|
1634 |
+
|
1635 |
+
if "trim" in kwargs:
|
1636 |
+
warnings.warn(
|
1637 |
+
"Passing 'trim' to control the cache behavior has been deprecated. "
|
1638 |
+
"Specify it within the 'cache_options' argument instead.",
|
1639 |
+
FutureWarning,
|
1640 |
+
)
|
1641 |
+
cache_options["trim"] = kwargs.pop("trim")
|
1642 |
+
|
1643 |
+
self.kwargs = kwargs
|
1644 |
+
|
1645 |
+
if mode not in {"ab", "rb", "wb"}:
|
1646 |
+
raise NotImplementedError("File mode not supported")
|
1647 |
+
if mode == "rb":
|
1648 |
+
if size is not None:
|
1649 |
+
self.size = size
|
1650 |
+
else:
|
1651 |
+
self.size = self.details["size"]
|
1652 |
+
self.cache = caches[cache_type](
|
1653 |
+
self.blocksize, self._fetch_range, self.size, **cache_options
|
1654 |
+
)
|
1655 |
+
else:
|
1656 |
+
self.buffer = io.BytesIO()
|
1657 |
+
self.offset = None
|
1658 |
+
self.forced = False
|
1659 |
+
self.location = None
|
1660 |
+
|
1661 |
+
@property
|
1662 |
+
def details(self):
|
1663 |
+
if self._details is None:
|
1664 |
+
self._details = self.fs.info(self.path)
|
1665 |
+
return self._details
|
1666 |
+
|
1667 |
+
@details.setter
|
1668 |
+
def details(self, value):
|
1669 |
+
self._details = value
|
1670 |
+
self.size = value["size"]
|
1671 |
+
|
1672 |
+
@property
|
1673 |
+
def full_name(self):
|
1674 |
+
return _unstrip_protocol(self.path, self.fs)
|
1675 |
+
|
1676 |
+
@property
|
1677 |
+
def closed(self):
|
1678 |
+
# get around this attr being read-only in IOBase
|
1679 |
+
# use getattr here, since this can be called during del
|
1680 |
+
return getattr(self, "_closed", True)
|
1681 |
+
|
1682 |
+
@closed.setter
|
1683 |
+
def closed(self, c):
|
1684 |
+
self._closed = c
|
1685 |
+
|
1686 |
+
def __hash__(self):
|
1687 |
+
if "w" in self.mode:
|
1688 |
+
return id(self)
|
1689 |
+
else:
|
1690 |
+
return int(tokenize(self.details), 16)
|
1691 |
+
|
1692 |
+
def __eq__(self, other):
|
1693 |
+
"""Files are equal if they have the same checksum, only in read mode"""
|
1694 |
+
return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
|
1695 |
+
|
1696 |
+
def commit(self):
|
1697 |
+
"""Move from temp to final destination"""
|
1698 |
+
|
1699 |
+
def discard(self):
|
1700 |
+
"""Throw away temporary file"""
|
1701 |
+
|
1702 |
+
def info(self):
|
1703 |
+
"""File information about this path"""
|
1704 |
+
if "r" in self.mode:
|
1705 |
+
return self.details
|
1706 |
+
else:
|
1707 |
+
raise ValueError("Info not available while writing")
|
1708 |
+
|
1709 |
+
def tell(self):
|
1710 |
+
"""Current file location"""
|
1711 |
+
return self.loc
|
1712 |
+
|
1713 |
+
def seek(self, loc, whence=0):
|
1714 |
+
"""Set current file location
|
1715 |
+
|
1716 |
+
Parameters
|
1717 |
+
----------
|
1718 |
+
loc: int
|
1719 |
+
byte location
|
1720 |
+
whence: {0, 1, 2}
|
1721 |
+
from start of file, current location or end of file, resp.
|
1722 |
+
"""
|
1723 |
+
loc = int(loc)
|
1724 |
+
if not self.mode == "rb":
|
1725 |
+
raise OSError(ESPIPE, "Seek only available in read mode")
|
1726 |
+
if whence == 0:
|
1727 |
+
nloc = loc
|
1728 |
+
elif whence == 1:
|
1729 |
+
nloc = self.loc + loc
|
1730 |
+
elif whence == 2:
|
1731 |
+
nloc = self.size + loc
|
1732 |
+
else:
|
1733 |
+
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
1734 |
+
if nloc < 0:
|
1735 |
+
raise ValueError("Seek before start of file")
|
1736 |
+
self.loc = nloc
|
1737 |
+
return self.loc
|
1738 |
+
|
1739 |
+
def write(self, data):
|
1740 |
+
"""
|
1741 |
+
Write data to buffer.
|
1742 |
+
|
1743 |
+
Buffer only sent on flush() or if buffer is greater than
|
1744 |
+
or equal to blocksize.
|
1745 |
+
|
1746 |
+
Parameters
|
1747 |
+
----------
|
1748 |
+
data: bytes
|
1749 |
+
Set of bytes to be written.
|
1750 |
+
"""
|
1751 |
+
if self.mode not in {"wb", "ab"}:
|
1752 |
+
raise ValueError("File not in write mode")
|
1753 |
+
if self.closed:
|
1754 |
+
raise ValueError("I/O operation on closed file.")
|
1755 |
+
if self.forced:
|
1756 |
+
raise ValueError("This file has been force-flushed, can only close")
|
1757 |
+
out = self.buffer.write(data)
|
1758 |
+
self.loc += out
|
1759 |
+
if self.buffer.tell() >= self.blocksize:
|
1760 |
+
self.flush()
|
1761 |
+
return out
|
1762 |
+
|
1763 |
+
def flush(self, force=False):
|
1764 |
+
"""
|
1765 |
+
Write buffered data to backend store.
|
1766 |
+
|
1767 |
+
Writes the current buffer, if it is larger than the block-size, or if
|
1768 |
+
the file is being closed.
|
1769 |
+
|
1770 |
+
Parameters
|
1771 |
+
----------
|
1772 |
+
force: bool
|
1773 |
+
When closing, write the last block even if it is smaller than
|
1774 |
+
blocks are allowed to be. Disallows further writing to this file.
|
1775 |
+
"""
|
1776 |
+
|
1777 |
+
if self.closed:
|
1778 |
+
raise ValueError("Flush on closed file")
|
1779 |
+
if force and self.forced:
|
1780 |
+
raise ValueError("Force flush cannot be called more than once")
|
1781 |
+
if force:
|
1782 |
+
self.forced = True
|
1783 |
+
|
1784 |
+
if self.mode not in {"wb", "ab"}:
|
1785 |
+
# no-op to flush on read-mode
|
1786 |
+
return
|
1787 |
+
|
1788 |
+
if not force and self.buffer.tell() < self.blocksize:
|
1789 |
+
# Defer write on small block
|
1790 |
+
return
|
1791 |
+
|
1792 |
+
if self.offset is None:
|
1793 |
+
# Initialize a multipart upload
|
1794 |
+
self.offset = 0
|
1795 |
+
try:
|
1796 |
+
self._initiate_upload()
|
1797 |
+
except: # noqa: E722
|
1798 |
+
self.closed = True
|
1799 |
+
raise
|
1800 |
+
|
1801 |
+
if self._upload_chunk(final=force) is not False:
|
1802 |
+
self.offset += self.buffer.seek(0, 2)
|
1803 |
+
self.buffer = io.BytesIO()
|
1804 |
+
|
1805 |
+
def _upload_chunk(self, final=False):
|
1806 |
+
"""Write one part of a multi-block file upload
|
1807 |
+
|
1808 |
+
Parameters
|
1809 |
+
==========
|
1810 |
+
final: bool
|
1811 |
+
This is the last block, so should complete file, if
|
1812 |
+
self.autocommit is True.
|
1813 |
+
"""
|
1814 |
+
# may not yet have been initialized, may need to call _initialize_upload
|
1815 |
+
|
1816 |
+
def _initiate_upload(self):
|
1817 |
+
"""Create remote file/upload"""
|
1818 |
+
pass
|
1819 |
+
|
1820 |
+
def _fetch_range(self, start, end):
|
1821 |
+
"""Get the specified set of bytes from remote"""
|
1822 |
+
raise NotImplementedError
|
1823 |
+
|
1824 |
+
def read(self, length=-1):
|
1825 |
+
"""
|
1826 |
+
Return data from cache, or fetch pieces as necessary
|
1827 |
+
|
1828 |
+
Parameters
|
1829 |
+
----------
|
1830 |
+
length: int (-1)
|
1831 |
+
Number of bytes to read; if <0, all remaining bytes.
|
1832 |
+
"""
|
1833 |
+
length = -1 if length is None else int(length)
|
1834 |
+
if self.mode != "rb":
|
1835 |
+
raise ValueError("File not in read mode")
|
1836 |
+
if length < 0:
|
1837 |
+
length = self.size - self.loc
|
1838 |
+
if self.closed:
|
1839 |
+
raise ValueError("I/O operation on closed file.")
|
1840 |
+
logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
|
1841 |
+
if length == 0:
|
1842 |
+
# don't even bother calling fetch
|
1843 |
+
return b""
|
1844 |
+
out = self.cache._fetch(self.loc, self.loc + length)
|
1845 |
+
self.loc += len(out)
|
1846 |
+
return out
|
1847 |
+
|
1848 |
+
def readinto(self, b):
|
1849 |
+
"""mirrors builtin file's readinto method
|
1850 |
+
|
1851 |
+
https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
|
1852 |
+
"""
|
1853 |
+
out = memoryview(b).cast("B")
|
1854 |
+
data = self.read(out.nbytes)
|
1855 |
+
out[: len(data)] = data
|
1856 |
+
return len(data)
|
1857 |
+
|
1858 |
+
def readuntil(self, char=b"\n", blocks=None):
|
1859 |
+
"""Return data between current position and first occurrence of char
|
1860 |
+
|
1861 |
+
char is included in the output, except if the end of the tile is
|
1862 |
+
encountered first.
|
1863 |
+
|
1864 |
+
Parameters
|
1865 |
+
----------
|
1866 |
+
char: bytes
|
1867 |
+
Thing to find
|
1868 |
+
blocks: None or int
|
1869 |
+
How much to read in each go. Defaults to file blocksize - which may
|
1870 |
+
mean a new read on every call.
|
1871 |
+
"""
|
1872 |
+
out = []
|
1873 |
+
while True:
|
1874 |
+
start = self.tell()
|
1875 |
+
part = self.read(blocks or self.blocksize)
|
1876 |
+
if len(part) == 0:
|
1877 |
+
break
|
1878 |
+
found = part.find(char)
|
1879 |
+
if found > -1:
|
1880 |
+
out.append(part[: found + len(char)])
|
1881 |
+
self.seek(start + found + len(char))
|
1882 |
+
break
|
1883 |
+
out.append(part)
|
1884 |
+
return b"".join(out)
|
1885 |
+
|
1886 |
+
def readline(self):
|
1887 |
+
"""Read until first occurrence of newline character
|
1888 |
+
|
1889 |
+
Note that, because of character encoding, this is not necessarily a
|
1890 |
+
true line ending.
|
1891 |
+
"""
|
1892 |
+
return self.readuntil(b"\n")
|
1893 |
+
|
1894 |
+
def __next__(self):
|
1895 |
+
out = self.readline()
|
1896 |
+
if out:
|
1897 |
+
return out
|
1898 |
+
raise StopIteration
|
1899 |
+
|
1900 |
+
def __iter__(self):
|
1901 |
+
return self
|
1902 |
+
|
1903 |
+
def readlines(self):
|
1904 |
+
"""Return all data, split by the newline character"""
|
1905 |
+
data = self.read()
|
1906 |
+
lines = data.split(b"\n")
|
1907 |
+
out = [l + b"\n" for l in lines[:-1]]
|
1908 |
+
if data.endswith(b"\n"):
|
1909 |
+
return out
|
1910 |
+
else:
|
1911 |
+
return out + [lines[-1]]
|
1912 |
+
# return list(self) ???
|
1913 |
+
|
1914 |
+
def readinto1(self, b):
|
1915 |
+
return self.readinto(b)
|
1916 |
+
|
1917 |
+
def close(self):
|
1918 |
+
"""Close file
|
1919 |
+
|
1920 |
+
Finalizes writes, discards cache
|
1921 |
+
"""
|
1922 |
+
if getattr(self, "_unclosable", False):
|
1923 |
+
return
|
1924 |
+
if self.closed:
|
1925 |
+
return
|
1926 |
+
if self.mode == "rb":
|
1927 |
+
self.cache = None
|
1928 |
+
else:
|
1929 |
+
if not self.forced:
|
1930 |
+
self.flush(force=True)
|
1931 |
+
|
1932 |
+
if self.fs is not None:
|
1933 |
+
self.fs.invalidate_cache(self.path)
|
1934 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
1935 |
+
|
1936 |
+
self.closed = True
|
1937 |
+
|
1938 |
+
def readable(self):
|
1939 |
+
"""Whether opened for reading"""
|
1940 |
+
return self.mode == "rb" and not self.closed
|
1941 |
+
|
1942 |
+
def seekable(self):
|
1943 |
+
"""Whether is seekable (only in read mode)"""
|
1944 |
+
return self.readable()
|
1945 |
+
|
1946 |
+
def writable(self):
|
1947 |
+
"""Whether opened for writing"""
|
1948 |
+
return self.mode in {"wb", "ab"} and not self.closed
|
1949 |
+
|
1950 |
+
def __del__(self):
|
1951 |
+
if not self.closed:
|
1952 |
+
self.close()
|
1953 |
+
|
1954 |
+
def __str__(self):
|
1955 |
+
return f"<File-like object {type(self.fs).__name__}, {self.path}>"
|
1956 |
+
|
1957 |
+
__repr__ = __str__
|
1958 |
+
|
1959 |
+
def __enter__(self):
|
1960 |
+
return self
|
1961 |
+
|
1962 |
+
def __exit__(self, *args):
|
1963 |
+
self.close()
|
lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from hashlib import md5
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.implementations.local import LocalFileSystem
|
7 |
+
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
|
8 |
+
from fsspec.tests.abstract.get import AbstractGetTests # noqa
|
9 |
+
from fsspec.tests.abstract.put import AbstractPutTests # noqa
|
10 |
+
|
11 |
+
|
12 |
+
class BaseAbstractFixtures:
|
13 |
+
"""
|
14 |
+
Abstract base class containing fixtures that are used by but never need to
|
15 |
+
be overridden in derived filesystem-specific classes to run the abstract
|
16 |
+
tests on such filesystems.
|
17 |
+
"""
|
18 |
+
|
19 |
+
@pytest.fixture
|
20 |
+
def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
|
21 |
+
"""
|
22 |
+
Scenario on remote filesystem that is used for many cp/get/put tests.
|
23 |
+
|
24 |
+
Cleans up at the end of each test it which it is used.
|
25 |
+
"""
|
26 |
+
source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
|
27 |
+
yield source
|
28 |
+
fs.rm(source, recursive=True)
|
29 |
+
|
30 |
+
@pytest.fixture
|
31 |
+
def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
|
32 |
+
"""
|
33 |
+
Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
|
34 |
+
|
35 |
+
Cleans up at the end of each test it which it is used.
|
36 |
+
"""
|
37 |
+
source = self._glob_edge_cases_files(fs, fs_join, fs_path)
|
38 |
+
yield source
|
39 |
+
fs.rm(source, recursive=True)
|
40 |
+
|
41 |
+
@pytest.fixture
|
42 |
+
def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
|
43 |
+
"""
|
44 |
+
Scenario on remote filesystem that is used to check cp/get/put on directory
|
45 |
+
and file with the same name prefixes.
|
46 |
+
|
47 |
+
Cleans up at the end of each test it which it is used.
|
48 |
+
"""
|
49 |
+
source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
|
50 |
+
yield source
|
51 |
+
fs.rm(source, recursive=True)
|
52 |
+
|
53 |
+
@pytest.fixture
|
54 |
+
def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
|
55 |
+
"""
|
56 |
+
Scenario on remote filesystem that is used to check cp/get/put files order
|
57 |
+
when source and destination are lists.
|
58 |
+
|
59 |
+
Cleans up at the end of each test it which it is used.
|
60 |
+
"""
|
61 |
+
source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
|
62 |
+
yield source
|
63 |
+
fs.rm(source, recursive=True)
|
64 |
+
|
65 |
+
@pytest.fixture
|
66 |
+
def fs_target(self, fs, fs_join, fs_path):
|
67 |
+
"""
|
68 |
+
Return name of remote directory that does not yet exist to copy into.
|
69 |
+
|
70 |
+
Cleans up at the end of each test it which it is used.
|
71 |
+
"""
|
72 |
+
target = fs_join(fs_path, "target")
|
73 |
+
yield target
|
74 |
+
if fs.exists(target):
|
75 |
+
fs.rm(target, recursive=True)
|
76 |
+
|
77 |
+
@pytest.fixture
|
78 |
+
def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
|
79 |
+
"""
|
80 |
+
Scenario on local filesystem that is used for many cp/get/put tests.
|
81 |
+
|
82 |
+
Cleans up at the end of each test it which it is used.
|
83 |
+
"""
|
84 |
+
source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
|
85 |
+
yield source
|
86 |
+
local_fs.rm(source, recursive=True)
|
87 |
+
|
88 |
+
@pytest.fixture
|
89 |
+
def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
|
90 |
+
"""
|
91 |
+
Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
|
92 |
+
|
93 |
+
Cleans up at the end of each test it which it is used.
|
94 |
+
"""
|
95 |
+
source = self._glob_edge_cases_files(local_fs, local_join, local_path)
|
96 |
+
yield source
|
97 |
+
local_fs.rm(source, recursive=True)
|
98 |
+
|
99 |
+
@pytest.fixture
|
100 |
+
def local_dir_and_file_with_same_name_prefix(
|
101 |
+
self, local_fs, local_join, local_path
|
102 |
+
):
|
103 |
+
"""
|
104 |
+
Scenario on local filesystem that is used to check cp/get/put on directory
|
105 |
+
and file with the same name prefixes.
|
106 |
+
|
107 |
+
Cleans up at the end of each test it which it is used.
|
108 |
+
"""
|
109 |
+
source = self._dir_and_file_with_same_name_prefix(
|
110 |
+
local_fs, local_join, local_path
|
111 |
+
)
|
112 |
+
yield source
|
113 |
+
local_fs.rm(source, recursive=True)
|
114 |
+
|
115 |
+
@pytest.fixture
|
116 |
+
def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
|
117 |
+
"""
|
118 |
+
Scenario on local filesystem that is used to check cp/get/put files order
|
119 |
+
when source and destination are lists.
|
120 |
+
|
121 |
+
Cleans up at the end of each test it which it is used.
|
122 |
+
"""
|
123 |
+
source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
|
124 |
+
yield source
|
125 |
+
local_fs.rm(source, recursive=True)
|
126 |
+
|
127 |
+
@pytest.fixture
|
128 |
+
def local_target(self, local_fs, local_join, local_path):
|
129 |
+
"""
|
130 |
+
Return name of local directory that does not yet exist to copy into.
|
131 |
+
|
132 |
+
Cleans up at the end of each test it which it is used.
|
133 |
+
"""
|
134 |
+
target = local_join(local_path, "target")
|
135 |
+
yield target
|
136 |
+
if local_fs.exists(target):
|
137 |
+
local_fs.rm(target, recursive=True)
|
138 |
+
|
139 |
+
def _glob_edge_cases_files(self, some_fs, some_join, some_path):
|
140 |
+
"""
|
141 |
+
Scenario that is used for glob edge cases cp/get/put tests.
|
142 |
+
Creates the following directory and file structure:
|
143 |
+
|
144 |
+
📁 source
|
145 |
+
├── 📄 file1
|
146 |
+
├── 📄 file2
|
147 |
+
├── 📁 subdir0
|
148 |
+
│ ├── 📄 subfile1
|
149 |
+
│ ├── 📄 subfile2
|
150 |
+
│ └── 📁 nesteddir
|
151 |
+
│ └── 📄 nestedfile
|
152 |
+
└── 📁 subdir1
|
153 |
+
├── 📄 subfile1
|
154 |
+
├── 📄 subfile2
|
155 |
+
└── 📁 nesteddir
|
156 |
+
└── 📄 nestedfile
|
157 |
+
"""
|
158 |
+
source = some_join(some_path, "source")
|
159 |
+
some_fs.touch(some_join(source, "file1"))
|
160 |
+
some_fs.touch(some_join(source, "file2"))
|
161 |
+
|
162 |
+
for subdir_idx in range(2):
|
163 |
+
subdir = some_join(source, f"subdir{subdir_idx}")
|
164 |
+
nesteddir = some_join(subdir, "nesteddir")
|
165 |
+
some_fs.makedirs(nesteddir)
|
166 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
167 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
168 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
169 |
+
|
170 |
+
return source
|
171 |
+
|
172 |
+
def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
|
173 |
+
"""
|
174 |
+
Scenario that is used for many cp/get/put tests. Creates the following
|
175 |
+
directory and file structure:
|
176 |
+
|
177 |
+
📁 source
|
178 |
+
├── 📄 file1
|
179 |
+
├── 📄 file2
|
180 |
+
└── 📁 subdir
|
181 |
+
├── 📄 subfile1
|
182 |
+
├── 📄 subfile2
|
183 |
+
└── 📁 nesteddir
|
184 |
+
└── 📄 nestedfile
|
185 |
+
"""
|
186 |
+
source = some_join(some_path, "source")
|
187 |
+
subdir = some_join(source, "subdir")
|
188 |
+
nesteddir = some_join(subdir, "nesteddir")
|
189 |
+
some_fs.makedirs(nesteddir)
|
190 |
+
some_fs.touch(some_join(source, "file1"))
|
191 |
+
some_fs.touch(some_join(source, "file2"))
|
192 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
193 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
194 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
195 |
+
return source
|
196 |
+
|
197 |
+
def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
|
198 |
+
"""
|
199 |
+
Scenario that is used to check cp/get/put on directory and file with
|
200 |
+
the same name prefixes. Creates the following directory and file structure:
|
201 |
+
|
202 |
+
📁 source
|
203 |
+
├── 📄 subdir.txt
|
204 |
+
└── 📁 subdir
|
205 |
+
└── 📄 subfile.txt
|
206 |
+
"""
|
207 |
+
source = some_join(some_path, "source")
|
208 |
+
subdir = some_join(source, "subdir")
|
209 |
+
file = some_join(source, "subdir.txt")
|
210 |
+
subfile = some_join(subdir, "subfile.txt")
|
211 |
+
some_fs.makedirs(subdir)
|
212 |
+
some_fs.touch(file)
|
213 |
+
some_fs.touch(subfile)
|
214 |
+
return source
|
215 |
+
|
216 |
+
def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
|
217 |
+
"""
|
218 |
+
Scenario that is used to check cp/get/put files order when source and
|
219 |
+
destination are lists. Creates the following directory and file structure:
|
220 |
+
|
221 |
+
📁 source
|
222 |
+
└── 📄 {hashed([0-9])}.txt
|
223 |
+
"""
|
224 |
+
source = some_join(some_path, "source")
|
225 |
+
for i in range(10):
|
226 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
227 |
+
path = some_join(source, f"{hashed_i}.txt")
|
228 |
+
some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
|
229 |
+
return source
|
230 |
+
|
231 |
+
|
232 |
+
class AbstractFixtures(BaseAbstractFixtures):
|
233 |
+
"""
|
234 |
+
Abstract base class containing fixtures that may be overridden in derived
|
235 |
+
filesystem-specific classes to run the abstract tests on such filesystems.
|
236 |
+
|
237 |
+
For any particular filesystem some of these fixtures must be overridden,
|
238 |
+
such as ``fs`` and ``fs_path``, and others may be overridden if the
|
239 |
+
default functions here are not appropriate, such as ``fs_join``.
|
240 |
+
"""
|
241 |
+
|
242 |
+
@pytest.fixture
|
243 |
+
def fs(self):
|
244 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
245 |
+
|
246 |
+
@pytest.fixture
|
247 |
+
def fs_join(self):
|
248 |
+
"""
|
249 |
+
Return a function that joins its arguments together into a path.
|
250 |
+
|
251 |
+
Most fsspec implementations join paths in a platform-dependent way,
|
252 |
+
but some will override this to always use a forward slash.
|
253 |
+
"""
|
254 |
+
return os.path.join
|
255 |
+
|
256 |
+
@pytest.fixture
|
257 |
+
def fs_path(self):
|
258 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
259 |
+
|
260 |
+
@pytest.fixture(scope="class")
|
261 |
+
def local_fs(self):
|
262 |
+
# Maybe need an option for auto_mkdir=False? This is only relevant
|
263 |
+
# for certain implementations.
|
264 |
+
return LocalFileSystem(auto_mkdir=True)
|
265 |
+
|
266 |
+
@pytest.fixture
|
267 |
+
def local_join(self):
|
268 |
+
"""
|
269 |
+
Return a function that joins its arguments together into a path, on
|
270 |
+
the local filesystem.
|
271 |
+
"""
|
272 |
+
return os.path.join
|
273 |
+
|
274 |
+
@pytest.fixture
|
275 |
+
def local_path(self, tmpdir):
|
276 |
+
return tmpdir
|
277 |
+
|
278 |
+
@pytest.fixture
|
279 |
+
def supports_empty_directories(self):
|
280 |
+
"""
|
281 |
+
Return whether this implementation supports empty directories.
|
282 |
+
"""
|
283 |
+
return True
|
284 |
+
|
285 |
+
@pytest.fixture
|
286 |
+
def fs_sanitize_path(self):
|
287 |
+
return lambda x: x
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (15 kB). View file
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc
ADDED
Binary file (2.33 kB). View file
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc
ADDED
Binary file (26.5 kB). View file
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc
ADDED
Binary file (26.3 kB). View file
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc
ADDED
Binary file (27.7 kB). View file
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/common.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GLOB_EDGE_CASES_TESTS = {
|
2 |
+
"argnames": ("path", "recursive", "maxdepth", "expected"),
|
3 |
+
"argvalues": [
|
4 |
+
("fil?1", False, None, ["file1"]),
|
5 |
+
("fil?1", True, None, ["file1"]),
|
6 |
+
("file[1-2]", False, None, ["file1", "file2"]),
|
7 |
+
("file[1-2]", True, None, ["file1", "file2"]),
|
8 |
+
("*", False, None, ["file1", "file2"]),
|
9 |
+
(
|
10 |
+
"*",
|
11 |
+
True,
|
12 |
+
None,
|
13 |
+
[
|
14 |
+
"file1",
|
15 |
+
"file2",
|
16 |
+
"subdir0/subfile1",
|
17 |
+
"subdir0/subfile2",
|
18 |
+
"subdir0/nesteddir/nestedfile",
|
19 |
+
"subdir1/subfile1",
|
20 |
+
"subdir1/subfile2",
|
21 |
+
"subdir1/nesteddir/nestedfile",
|
22 |
+
],
|
23 |
+
),
|
24 |
+
("*", True, 1, ["file1", "file2"]),
|
25 |
+
(
|
26 |
+
"*",
|
27 |
+
True,
|
28 |
+
2,
|
29 |
+
[
|
30 |
+
"file1",
|
31 |
+
"file2",
|
32 |
+
"subdir0/subfile1",
|
33 |
+
"subdir0/subfile2",
|
34 |
+
"subdir1/subfile1",
|
35 |
+
"subdir1/subfile2",
|
36 |
+
],
|
37 |
+
),
|
38 |
+
("*1", False, None, ["file1"]),
|
39 |
+
(
|
40 |
+
"*1",
|
41 |
+
True,
|
42 |
+
None,
|
43 |
+
[
|
44 |
+
"file1",
|
45 |
+
"subdir1/subfile1",
|
46 |
+
"subdir1/subfile2",
|
47 |
+
"subdir1/nesteddir/nestedfile",
|
48 |
+
],
|
49 |
+
),
|
50 |
+
("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
|
51 |
+
(
|
52 |
+
"**",
|
53 |
+
False,
|
54 |
+
None,
|
55 |
+
[
|
56 |
+
"file1",
|
57 |
+
"file2",
|
58 |
+
"subdir0/subfile1",
|
59 |
+
"subdir0/subfile2",
|
60 |
+
"subdir0/nesteddir/nestedfile",
|
61 |
+
"subdir1/subfile1",
|
62 |
+
"subdir1/subfile2",
|
63 |
+
"subdir1/nesteddir/nestedfile",
|
64 |
+
],
|
65 |
+
),
|
66 |
+
(
|
67 |
+
"**",
|
68 |
+
True,
|
69 |
+
None,
|
70 |
+
[
|
71 |
+
"file1",
|
72 |
+
"file2",
|
73 |
+
"subdir0/subfile1",
|
74 |
+
"subdir0/subfile2",
|
75 |
+
"subdir0/nesteddir/nestedfile",
|
76 |
+
"subdir1/subfile1",
|
77 |
+
"subdir1/subfile2",
|
78 |
+
"subdir1/nesteddir/nestedfile",
|
79 |
+
],
|
80 |
+
),
|
81 |
+
("**", True, 1, ["file1", "file2"]),
|
82 |
+
(
|
83 |
+
"**",
|
84 |
+
True,
|
85 |
+
2,
|
86 |
+
[
|
87 |
+
"file1",
|
88 |
+
"file2",
|
89 |
+
"subdir0/subfile1",
|
90 |
+
"subdir0/subfile2",
|
91 |
+
"subdir0/nesteddir/nestedfile",
|
92 |
+
"subdir1/subfile1",
|
93 |
+
"subdir1/subfile2",
|
94 |
+
"subdir1/nesteddir/nestedfile",
|
95 |
+
],
|
96 |
+
),
|
97 |
+
(
|
98 |
+
"**",
|
99 |
+
False,
|
100 |
+
2,
|
101 |
+
[
|
102 |
+
"file1",
|
103 |
+
"file2",
|
104 |
+
"subdir0/subfile1",
|
105 |
+
"subdir0/subfile2",
|
106 |
+
"subdir1/subfile1",
|
107 |
+
"subdir1/subfile2",
|
108 |
+
],
|
109 |
+
),
|
110 |
+
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
111 |
+
(
|
112 |
+
"**/*1",
|
113 |
+
True,
|
114 |
+
None,
|
115 |
+
[
|
116 |
+
"file1",
|
117 |
+
"subdir0/subfile1",
|
118 |
+
"subdir1/subfile1",
|
119 |
+
"subdir1/subfile2",
|
120 |
+
"subdir1/nesteddir/nestedfile",
|
121 |
+
],
|
122 |
+
),
|
123 |
+
("**/*1", True, 1, ["file1"]),
|
124 |
+
(
|
125 |
+
"**/*1",
|
126 |
+
True,
|
127 |
+
2,
|
128 |
+
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
|
129 |
+
),
|
130 |
+
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
131 |
+
("**/subdir0", False, None, []),
|
132 |
+
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
133 |
+
("**/subdir0/nested*", False, 2, []),
|
134 |
+
("**/subdir0/nested*", True, 2, ["nestedfile"]),
|
135 |
+
("subdir[1-2]", False, None, []),
|
136 |
+
("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
137 |
+
("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
|
138 |
+
("subdir[0-1]", False, None, []),
|
139 |
+
(
|
140 |
+
"subdir[0-1]",
|
141 |
+
True,
|
142 |
+
None,
|
143 |
+
[
|
144 |
+
"subdir0/subfile1",
|
145 |
+
"subdir0/subfile2",
|
146 |
+
"subdir0/nesteddir/nestedfile",
|
147 |
+
"subdir1/subfile1",
|
148 |
+
"subdir1/subfile2",
|
149 |
+
"subdir1/nesteddir/nestedfile",
|
150 |
+
],
|
151 |
+
),
|
152 |
+
(
|
153 |
+
"subdir[0-1]/*fil[e]*",
|
154 |
+
False,
|
155 |
+
None,
|
156 |
+
[
|
157 |
+
"subdir0/subfile1",
|
158 |
+
"subdir0/subfile2",
|
159 |
+
"subdir1/subfile1",
|
160 |
+
"subdir1/subfile2",
|
161 |
+
],
|
162 |
+
),
|
163 |
+
(
|
164 |
+
"subdir[0-1]/*fil[e]*",
|
165 |
+
True,
|
166 |
+
None,
|
167 |
+
[
|
168 |
+
"subdir0/subfile1",
|
169 |
+
"subdir0/subfile2",
|
170 |
+
"subdir1/subfile1",
|
171 |
+
"subdir1/subfile2",
|
172 |
+
],
|
173 |
+
),
|
174 |
+
],
|
175 |
+
}
|
lib/python3.11/site-packages/fsspec/tests/abstract/copy.py
ADDED
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
7 |
+
|
8 |
+
|
9 |
+
class AbstractCopyTests:
|
10 |
+
def test_copy_file_to_existing_directory(
|
11 |
+
self,
|
12 |
+
fs,
|
13 |
+
fs_join,
|
14 |
+
fs_bulk_operations_scenario_0,
|
15 |
+
fs_target,
|
16 |
+
supports_empty_directories,
|
17 |
+
):
|
18 |
+
# Copy scenario 1a
|
19 |
+
source = fs_bulk_operations_scenario_0
|
20 |
+
|
21 |
+
target = fs_target
|
22 |
+
fs.mkdir(target)
|
23 |
+
if not supports_empty_directories:
|
24 |
+
# Force target directory to exist by adding a dummy file
|
25 |
+
fs.touch(fs_join(target, "dummy"))
|
26 |
+
assert fs.isdir(target)
|
27 |
+
|
28 |
+
target_file2 = fs_join(target, "file2")
|
29 |
+
target_subfile1 = fs_join(target, "subfile1")
|
30 |
+
|
31 |
+
# Copy from source directory
|
32 |
+
fs.cp(fs_join(source, "file2"), target)
|
33 |
+
assert fs.isfile(target_file2)
|
34 |
+
|
35 |
+
# Copy from sub directory
|
36 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target)
|
37 |
+
assert fs.isfile(target_subfile1)
|
38 |
+
|
39 |
+
# Remove copied files
|
40 |
+
fs.rm([target_file2, target_subfile1])
|
41 |
+
assert not fs.exists(target_file2)
|
42 |
+
assert not fs.exists(target_subfile1)
|
43 |
+
|
44 |
+
# Repeat with trailing slash on target
|
45 |
+
fs.cp(fs_join(source, "file2"), target + "/")
|
46 |
+
assert fs.isdir(target)
|
47 |
+
assert fs.isfile(target_file2)
|
48 |
+
|
49 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
|
50 |
+
assert fs.isfile(target_subfile1)
|
51 |
+
|
52 |
+
def test_copy_file_to_new_directory(
|
53 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
54 |
+
):
|
55 |
+
# Copy scenario 1b
|
56 |
+
source = fs_bulk_operations_scenario_0
|
57 |
+
|
58 |
+
target = fs_target
|
59 |
+
fs.mkdir(target)
|
60 |
+
|
61 |
+
fs.cp(
|
62 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
63 |
+
) # Note trailing slash
|
64 |
+
assert fs.isdir(target)
|
65 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
66 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
67 |
+
|
68 |
+
def test_copy_file_to_file_in_existing_directory(
|
69 |
+
self,
|
70 |
+
fs,
|
71 |
+
fs_join,
|
72 |
+
fs_bulk_operations_scenario_0,
|
73 |
+
fs_target,
|
74 |
+
supports_empty_directories,
|
75 |
+
):
|
76 |
+
# Copy scenario 1c
|
77 |
+
source = fs_bulk_operations_scenario_0
|
78 |
+
|
79 |
+
target = fs_target
|
80 |
+
fs.mkdir(target)
|
81 |
+
if not supports_empty_directories:
|
82 |
+
# Force target directory to exist by adding a dummy file
|
83 |
+
fs.touch(fs_join(target, "dummy"))
|
84 |
+
assert fs.isdir(target)
|
85 |
+
|
86 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
87 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
88 |
+
|
89 |
+
def test_copy_file_to_file_in_new_directory(
|
90 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
91 |
+
):
|
92 |
+
# Copy scenario 1d
|
93 |
+
source = fs_bulk_operations_scenario_0
|
94 |
+
|
95 |
+
target = fs_target
|
96 |
+
fs.mkdir(target)
|
97 |
+
|
98 |
+
fs.cp(
|
99 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
|
100 |
+
)
|
101 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
102 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
103 |
+
|
104 |
+
def test_copy_directory_to_existing_directory(
|
105 |
+
self,
|
106 |
+
fs,
|
107 |
+
fs_join,
|
108 |
+
fs_bulk_operations_scenario_0,
|
109 |
+
fs_target,
|
110 |
+
supports_empty_directories,
|
111 |
+
):
|
112 |
+
# Copy scenario 1e
|
113 |
+
source = fs_bulk_operations_scenario_0
|
114 |
+
|
115 |
+
target = fs_target
|
116 |
+
fs.mkdir(target)
|
117 |
+
if not supports_empty_directories:
|
118 |
+
# Force target directory to exist by adding a dummy file
|
119 |
+
dummy = fs_join(target, "dummy")
|
120 |
+
fs.touch(dummy)
|
121 |
+
assert fs.isdir(target)
|
122 |
+
|
123 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
124 |
+
s = fs_join(source, "subdir")
|
125 |
+
if source_slash:
|
126 |
+
s += "/"
|
127 |
+
t = target + "/" if target_slash else target
|
128 |
+
|
129 |
+
# Without recursive does nothing
|
130 |
+
fs.cp(s, t)
|
131 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
132 |
+
|
133 |
+
# With recursive
|
134 |
+
fs.cp(s, t, recursive=True)
|
135 |
+
if source_slash:
|
136 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
137 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
138 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
139 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
140 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
141 |
+
|
142 |
+
fs.rm(
|
143 |
+
[
|
144 |
+
fs_join(target, "subfile1"),
|
145 |
+
fs_join(target, "subfile2"),
|
146 |
+
fs_join(target, "nesteddir"),
|
147 |
+
],
|
148 |
+
recursive=True,
|
149 |
+
)
|
150 |
+
else:
|
151 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
152 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
153 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
154 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
155 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
156 |
+
|
157 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
158 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
159 |
+
|
160 |
+
# Limit recursive by maxdepth
|
161 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
162 |
+
if source_slash:
|
163 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
164 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
165 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
166 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
167 |
+
|
168 |
+
fs.rm(
|
169 |
+
[
|
170 |
+
fs_join(target, "subfile1"),
|
171 |
+
fs_join(target, "subfile2"),
|
172 |
+
],
|
173 |
+
recursive=True,
|
174 |
+
)
|
175 |
+
else:
|
176 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
177 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
178 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
179 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
180 |
+
|
181 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
182 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
183 |
+
|
184 |
+
def test_copy_directory_to_new_directory(
|
185 |
+
self,
|
186 |
+
fs,
|
187 |
+
fs_join,
|
188 |
+
fs_bulk_operations_scenario_0,
|
189 |
+
fs_target,
|
190 |
+
supports_empty_directories,
|
191 |
+
):
|
192 |
+
# Copy scenario 1f
|
193 |
+
source = fs_bulk_operations_scenario_0
|
194 |
+
|
195 |
+
target = fs_target
|
196 |
+
fs.mkdir(target)
|
197 |
+
|
198 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
199 |
+
s = fs_join(source, "subdir")
|
200 |
+
if source_slash:
|
201 |
+
s += "/"
|
202 |
+
t = fs_join(target, "newdir")
|
203 |
+
if target_slash:
|
204 |
+
t += "/"
|
205 |
+
|
206 |
+
# Without recursive does nothing
|
207 |
+
fs.cp(s, t)
|
208 |
+
if supports_empty_directories:
|
209 |
+
assert fs.ls(target) == []
|
210 |
+
else:
|
211 |
+
with pytest.raises(FileNotFoundError):
|
212 |
+
fs.ls(target)
|
213 |
+
|
214 |
+
# With recursive
|
215 |
+
fs.cp(s, t, recursive=True)
|
216 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
217 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
218 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
219 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
220 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
221 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
222 |
+
|
223 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
224 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
225 |
+
|
226 |
+
# Limit recursive by maxdepth
|
227 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
228 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
229 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
230 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
231 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
232 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
233 |
+
|
234 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
235 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
236 |
+
|
237 |
+
def test_copy_glob_to_existing_directory(
|
238 |
+
self,
|
239 |
+
fs,
|
240 |
+
fs_join,
|
241 |
+
fs_bulk_operations_scenario_0,
|
242 |
+
fs_target,
|
243 |
+
supports_empty_directories,
|
244 |
+
):
|
245 |
+
# Copy scenario 1g
|
246 |
+
source = fs_bulk_operations_scenario_0
|
247 |
+
|
248 |
+
target = fs_target
|
249 |
+
fs.mkdir(target)
|
250 |
+
if not supports_empty_directories:
|
251 |
+
# Force target directory to exist by adding a dummy file
|
252 |
+
dummy = fs_join(target, "dummy")
|
253 |
+
fs.touch(dummy)
|
254 |
+
assert fs.isdir(target)
|
255 |
+
|
256 |
+
for target_slash in [False, True]:
|
257 |
+
t = target + "/" if target_slash else target
|
258 |
+
|
259 |
+
# Without recursive
|
260 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
261 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
262 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
263 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
264 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
265 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
266 |
+
|
267 |
+
fs.rm(
|
268 |
+
[
|
269 |
+
fs_join(target, "subfile1"),
|
270 |
+
fs_join(target, "subfile2"),
|
271 |
+
],
|
272 |
+
recursive=True,
|
273 |
+
)
|
274 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
275 |
+
|
276 |
+
# With recursive
|
277 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
278 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
279 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
280 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
281 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
282 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
283 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
284 |
+
|
285 |
+
fs.rm(
|
286 |
+
[
|
287 |
+
fs_join(target, "subfile1"),
|
288 |
+
fs_join(target, "subfile2"),
|
289 |
+
fs_join(target, "nesteddir"),
|
290 |
+
],
|
291 |
+
recursive=True,
|
292 |
+
)
|
293 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
294 |
+
|
295 |
+
# Limit recursive by maxdepth
|
296 |
+
fs.cp(
|
297 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
298 |
+
)
|
299 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
300 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
301 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
302 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
303 |
+
|
304 |
+
fs.rm(
|
305 |
+
[
|
306 |
+
fs_join(target, "subfile1"),
|
307 |
+
fs_join(target, "subfile2"),
|
308 |
+
],
|
309 |
+
recursive=True,
|
310 |
+
)
|
311 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
312 |
+
|
313 |
+
def test_copy_glob_to_new_directory(
|
314 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
315 |
+
):
|
316 |
+
# Copy scenario 1h
|
317 |
+
source = fs_bulk_operations_scenario_0
|
318 |
+
|
319 |
+
target = fs_target
|
320 |
+
fs.mkdir(target)
|
321 |
+
|
322 |
+
for target_slash in [False, True]:
|
323 |
+
t = fs_join(target, "newdir")
|
324 |
+
if target_slash:
|
325 |
+
t += "/"
|
326 |
+
|
327 |
+
# Without recursive
|
328 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
329 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
330 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
331 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
332 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
333 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
334 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
335 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
336 |
+
|
337 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
338 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
339 |
+
|
340 |
+
# With recursive
|
341 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
342 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
343 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
344 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
345 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
346 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
347 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
348 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
349 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
350 |
+
|
351 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
352 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
353 |
+
|
354 |
+
# Limit recursive by maxdepth
|
355 |
+
fs.cp(
|
356 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
357 |
+
)
|
358 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
359 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
360 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
361 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
362 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
363 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
364 |
+
|
365 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
366 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
367 |
+
|
368 |
+
@pytest.mark.parametrize(
|
369 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
370 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
371 |
+
)
|
372 |
+
def test_copy_glob_edge_cases(
|
373 |
+
self,
|
374 |
+
path,
|
375 |
+
recursive,
|
376 |
+
maxdepth,
|
377 |
+
expected,
|
378 |
+
fs,
|
379 |
+
fs_join,
|
380 |
+
fs_glob_edge_cases_files,
|
381 |
+
fs_target,
|
382 |
+
fs_sanitize_path,
|
383 |
+
):
|
384 |
+
# Copy scenario 1g
|
385 |
+
source = fs_glob_edge_cases_files
|
386 |
+
|
387 |
+
target = fs_target
|
388 |
+
|
389 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
390 |
+
fs.mkdir(target)
|
391 |
+
|
392 |
+
t = fs_join(target, "newdir") if new_dir else target
|
393 |
+
t = t + "/" if target_slash else t
|
394 |
+
|
395 |
+
fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
396 |
+
|
397 |
+
output = fs.find(target)
|
398 |
+
if new_dir:
|
399 |
+
prefixed_expected = [
|
400 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
401 |
+
]
|
402 |
+
else:
|
403 |
+
prefixed_expected = [
|
404 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
405 |
+
]
|
406 |
+
assert sorted(output) == sorted(prefixed_expected)
|
407 |
+
|
408 |
+
try:
|
409 |
+
fs.rm(target, recursive=True)
|
410 |
+
except FileNotFoundError:
|
411 |
+
pass
|
412 |
+
|
413 |
+
def test_copy_list_of_files_to_existing_directory(
|
414 |
+
self,
|
415 |
+
fs,
|
416 |
+
fs_join,
|
417 |
+
fs_bulk_operations_scenario_0,
|
418 |
+
fs_target,
|
419 |
+
supports_empty_directories,
|
420 |
+
):
|
421 |
+
# Copy scenario 2a
|
422 |
+
source = fs_bulk_operations_scenario_0
|
423 |
+
|
424 |
+
target = fs_target
|
425 |
+
fs.mkdir(target)
|
426 |
+
if not supports_empty_directories:
|
427 |
+
# Force target directory to exist by adding a dummy file
|
428 |
+
dummy = fs_join(target, "dummy")
|
429 |
+
fs.touch(dummy)
|
430 |
+
assert fs.isdir(target)
|
431 |
+
|
432 |
+
source_files = [
|
433 |
+
fs_join(source, "file1"),
|
434 |
+
fs_join(source, "file2"),
|
435 |
+
fs_join(source, "subdir", "subfile1"),
|
436 |
+
]
|
437 |
+
|
438 |
+
for target_slash in [False, True]:
|
439 |
+
t = target + "/" if target_slash else target
|
440 |
+
|
441 |
+
fs.cp(source_files, t)
|
442 |
+
assert fs.isfile(fs_join(target, "file1"))
|
443 |
+
assert fs.isfile(fs_join(target, "file2"))
|
444 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
445 |
+
|
446 |
+
fs.rm(
|
447 |
+
[
|
448 |
+
fs_join(target, "file1"),
|
449 |
+
fs_join(target, "file2"),
|
450 |
+
fs_join(target, "subfile1"),
|
451 |
+
],
|
452 |
+
recursive=True,
|
453 |
+
)
|
454 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
455 |
+
|
456 |
+
def test_copy_list_of_files_to_new_directory(
|
457 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
458 |
+
):
|
459 |
+
# Copy scenario 2b
|
460 |
+
source = fs_bulk_operations_scenario_0
|
461 |
+
|
462 |
+
target = fs_target
|
463 |
+
fs.mkdir(target)
|
464 |
+
|
465 |
+
source_files = [
|
466 |
+
fs_join(source, "file1"),
|
467 |
+
fs_join(source, "file2"),
|
468 |
+
fs_join(source, "subdir", "subfile1"),
|
469 |
+
]
|
470 |
+
|
471 |
+
fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
472 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
473 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
474 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
475 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
476 |
+
|
477 |
+
def test_copy_two_files_new_directory(
|
478 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
479 |
+
):
|
480 |
+
# This is a duplicate of test_copy_list_of_files_to_new_directory and
|
481 |
+
# can eventually be removed.
|
482 |
+
source = fs_bulk_operations_scenario_0
|
483 |
+
|
484 |
+
target = fs_target
|
485 |
+
assert not fs.exists(target)
|
486 |
+
fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
|
487 |
+
|
488 |
+
assert fs.isdir(target)
|
489 |
+
assert fs.isfile(fs_join(target, "file1"))
|
490 |
+
assert fs.isfile(fs_join(target, "file2"))
|
491 |
+
|
492 |
+
def test_copy_directory_without_files_with_same_name_prefix(
|
493 |
+
self,
|
494 |
+
fs,
|
495 |
+
fs_join,
|
496 |
+
fs_target,
|
497 |
+
fs_dir_and_file_with_same_name_prefix,
|
498 |
+
supports_empty_directories,
|
499 |
+
):
|
500 |
+
# Create the test dirs
|
501 |
+
source = fs_dir_and_file_with_same_name_prefix
|
502 |
+
target = fs_target
|
503 |
+
|
504 |
+
# Test without glob
|
505 |
+
fs.cp(fs_join(source, "subdir"), target, recursive=True)
|
506 |
+
|
507 |
+
assert fs.isfile(fs_join(target, "subfile.txt"))
|
508 |
+
assert not fs.isfile(fs_join(target, "subdir.txt"))
|
509 |
+
|
510 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
511 |
+
if supports_empty_directories:
|
512 |
+
assert fs.ls(target) == []
|
513 |
+
else:
|
514 |
+
assert not fs.exists(target)
|
515 |
+
|
516 |
+
# Test with glob
|
517 |
+
fs.cp(fs_join(source, "subdir*"), target, recursive=True)
|
518 |
+
|
519 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
520 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
|
521 |
+
assert fs.isfile(fs_join(target, "subdir.txt"))
|
522 |
+
|
523 |
+
def test_copy_with_source_and_destination_as_list(
|
524 |
+
self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
|
525 |
+
):
|
526 |
+
# Create the test dir
|
527 |
+
source = fs_10_files_with_hashed_names
|
528 |
+
target = fs_target
|
529 |
+
|
530 |
+
# Create list of files for source and destination
|
531 |
+
source_files = []
|
532 |
+
destination_files = []
|
533 |
+
for i in range(10):
|
534 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
535 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
536 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
537 |
+
|
538 |
+
# Copy and assert order was kept
|
539 |
+
fs.copy(path1=source_files, path2=destination_files)
|
540 |
+
|
541 |
+
for i in range(10):
|
542 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
543 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/tests/abstract/get.py
ADDED
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.implementations.local import make_path_posix
|
7 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
8 |
+
|
9 |
+
|
10 |
+
class AbstractGetTests:
|
11 |
+
def test_get_file_to_existing_directory(
|
12 |
+
self,
|
13 |
+
fs,
|
14 |
+
fs_join,
|
15 |
+
fs_bulk_operations_scenario_0,
|
16 |
+
local_fs,
|
17 |
+
local_join,
|
18 |
+
local_target,
|
19 |
+
):
|
20 |
+
# Copy scenario 1a
|
21 |
+
source = fs_bulk_operations_scenario_0
|
22 |
+
|
23 |
+
target = local_target
|
24 |
+
local_fs.mkdir(target)
|
25 |
+
assert local_fs.isdir(target)
|
26 |
+
|
27 |
+
target_file2 = local_join(target, "file2")
|
28 |
+
target_subfile1 = local_join(target, "subfile1")
|
29 |
+
|
30 |
+
# Copy from source directory
|
31 |
+
fs.get(fs_join(source, "file2"), target)
|
32 |
+
assert local_fs.isfile(target_file2)
|
33 |
+
|
34 |
+
# Copy from sub directory
|
35 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target)
|
36 |
+
assert local_fs.isfile(target_subfile1)
|
37 |
+
|
38 |
+
# Remove copied files
|
39 |
+
local_fs.rm([target_file2, target_subfile1])
|
40 |
+
assert not local_fs.exists(target_file2)
|
41 |
+
assert not local_fs.exists(target_subfile1)
|
42 |
+
|
43 |
+
# Repeat with trailing slash on target
|
44 |
+
fs.get(fs_join(source, "file2"), target + "/")
|
45 |
+
assert local_fs.isdir(target)
|
46 |
+
assert local_fs.isfile(target_file2)
|
47 |
+
|
48 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
|
49 |
+
assert local_fs.isfile(target_subfile1)
|
50 |
+
|
51 |
+
def test_get_file_to_new_directory(
|
52 |
+
self,
|
53 |
+
fs,
|
54 |
+
fs_join,
|
55 |
+
fs_bulk_operations_scenario_0,
|
56 |
+
local_fs,
|
57 |
+
local_join,
|
58 |
+
local_target,
|
59 |
+
):
|
60 |
+
# Copy scenario 1b
|
61 |
+
source = fs_bulk_operations_scenario_0
|
62 |
+
|
63 |
+
target = local_target
|
64 |
+
local_fs.mkdir(target)
|
65 |
+
|
66 |
+
fs.get(
|
67 |
+
fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
|
68 |
+
) # Note trailing slash
|
69 |
+
|
70 |
+
assert local_fs.isdir(target)
|
71 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
72 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
73 |
+
|
74 |
+
def test_get_file_to_file_in_existing_directory(
|
75 |
+
self,
|
76 |
+
fs,
|
77 |
+
fs_join,
|
78 |
+
fs_bulk_operations_scenario_0,
|
79 |
+
local_fs,
|
80 |
+
local_join,
|
81 |
+
local_target,
|
82 |
+
):
|
83 |
+
# Copy scenario 1c
|
84 |
+
source = fs_bulk_operations_scenario_0
|
85 |
+
|
86 |
+
target = local_target
|
87 |
+
local_fs.mkdir(target)
|
88 |
+
|
89 |
+
fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
|
90 |
+
assert local_fs.isfile(local_join(target, "newfile"))
|
91 |
+
|
92 |
+
def test_get_file_to_file_in_new_directory(
|
93 |
+
self,
|
94 |
+
fs,
|
95 |
+
fs_join,
|
96 |
+
fs_bulk_operations_scenario_0,
|
97 |
+
local_fs,
|
98 |
+
local_join,
|
99 |
+
local_target,
|
100 |
+
):
|
101 |
+
# Copy scenario 1d
|
102 |
+
source = fs_bulk_operations_scenario_0
|
103 |
+
|
104 |
+
target = local_target
|
105 |
+
local_fs.mkdir(target)
|
106 |
+
|
107 |
+
fs.get(
|
108 |
+
fs_join(source, "subdir", "subfile1"),
|
109 |
+
local_join(target, "newdir", "newfile"),
|
110 |
+
)
|
111 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
112 |
+
assert local_fs.isfile(local_join(target, "newdir", "newfile"))
|
113 |
+
|
114 |
+
def test_get_directory_to_existing_directory(
|
115 |
+
self,
|
116 |
+
fs,
|
117 |
+
fs_join,
|
118 |
+
fs_bulk_operations_scenario_0,
|
119 |
+
local_fs,
|
120 |
+
local_join,
|
121 |
+
local_target,
|
122 |
+
):
|
123 |
+
# Copy scenario 1e
|
124 |
+
source = fs_bulk_operations_scenario_0
|
125 |
+
|
126 |
+
target = local_target
|
127 |
+
local_fs.mkdir(target)
|
128 |
+
assert local_fs.isdir(target)
|
129 |
+
|
130 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
131 |
+
s = fs_join(source, "subdir")
|
132 |
+
if source_slash:
|
133 |
+
s += "/"
|
134 |
+
t = target + "/" if target_slash else target
|
135 |
+
|
136 |
+
# Without recursive does nothing
|
137 |
+
fs.get(s, t)
|
138 |
+
assert local_fs.ls(target) == []
|
139 |
+
|
140 |
+
# With recursive
|
141 |
+
fs.get(s, t, recursive=True)
|
142 |
+
if source_slash:
|
143 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
144 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
145 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
146 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
147 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
148 |
+
|
149 |
+
local_fs.rm(
|
150 |
+
[
|
151 |
+
local_join(target, "subfile1"),
|
152 |
+
local_join(target, "subfile2"),
|
153 |
+
local_join(target, "nesteddir"),
|
154 |
+
],
|
155 |
+
recursive=True,
|
156 |
+
)
|
157 |
+
else:
|
158 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
159 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
160 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
161 |
+
assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
|
162 |
+
assert local_fs.isfile(
|
163 |
+
local_join(target, "subdir", "nesteddir", "nestedfile")
|
164 |
+
)
|
165 |
+
|
166 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
167 |
+
assert local_fs.ls(target) == []
|
168 |
+
|
169 |
+
# Limit recursive by maxdepth
|
170 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
171 |
+
if source_slash:
|
172 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
173 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
174 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
175 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
176 |
+
|
177 |
+
local_fs.rm(
|
178 |
+
[
|
179 |
+
local_join(target, "subfile1"),
|
180 |
+
local_join(target, "subfile2"),
|
181 |
+
],
|
182 |
+
recursive=True,
|
183 |
+
)
|
184 |
+
else:
|
185 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
186 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
187 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
188 |
+
assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
|
189 |
+
|
190 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
191 |
+
assert local_fs.ls(target) == []
|
192 |
+
|
193 |
+
def test_get_directory_to_new_directory(
|
194 |
+
self,
|
195 |
+
fs,
|
196 |
+
fs_join,
|
197 |
+
fs_bulk_operations_scenario_0,
|
198 |
+
local_fs,
|
199 |
+
local_join,
|
200 |
+
local_target,
|
201 |
+
):
|
202 |
+
# Copy scenario 1f
|
203 |
+
source = fs_bulk_operations_scenario_0
|
204 |
+
|
205 |
+
target = local_target
|
206 |
+
local_fs.mkdir(target)
|
207 |
+
|
208 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
209 |
+
s = fs_join(source, "subdir")
|
210 |
+
if source_slash:
|
211 |
+
s += "/"
|
212 |
+
t = local_join(target, "newdir")
|
213 |
+
if target_slash:
|
214 |
+
t += "/"
|
215 |
+
|
216 |
+
# Without recursive does nothing
|
217 |
+
fs.get(s, t)
|
218 |
+
assert local_fs.ls(target) == []
|
219 |
+
|
220 |
+
# With recursive
|
221 |
+
fs.get(s, t, recursive=True)
|
222 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
223 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
224 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
225 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
226 |
+
assert local_fs.isfile(
|
227 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
228 |
+
)
|
229 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
230 |
+
|
231 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
232 |
+
assert local_fs.ls(target) == []
|
233 |
+
|
234 |
+
# Limit recursive by maxdepth
|
235 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
236 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
237 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
238 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
239 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
240 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
241 |
+
|
242 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
243 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
244 |
+
|
245 |
+
def test_get_glob_to_existing_directory(
|
246 |
+
self,
|
247 |
+
fs,
|
248 |
+
fs_join,
|
249 |
+
fs_bulk_operations_scenario_0,
|
250 |
+
local_fs,
|
251 |
+
local_join,
|
252 |
+
local_target,
|
253 |
+
):
|
254 |
+
# Copy scenario 1g
|
255 |
+
source = fs_bulk_operations_scenario_0
|
256 |
+
|
257 |
+
target = local_target
|
258 |
+
local_fs.mkdir(target)
|
259 |
+
|
260 |
+
for target_slash in [False, True]:
|
261 |
+
t = target + "/" if target_slash else target
|
262 |
+
|
263 |
+
# Without recursive
|
264 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
265 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
266 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
267 |
+
assert not local_fs.isdir(local_join(target, "nesteddir"))
|
268 |
+
assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
|
269 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
270 |
+
|
271 |
+
local_fs.rm(
|
272 |
+
[
|
273 |
+
local_join(target, "subfile1"),
|
274 |
+
local_join(target, "subfile2"),
|
275 |
+
],
|
276 |
+
recursive=True,
|
277 |
+
)
|
278 |
+
assert local_fs.ls(target) == []
|
279 |
+
|
280 |
+
# With recursive
|
281 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
282 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
283 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
284 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
285 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
286 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
287 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
288 |
+
|
289 |
+
local_fs.rm(
|
290 |
+
[
|
291 |
+
local_join(target, "subfile1"),
|
292 |
+
local_join(target, "subfile2"),
|
293 |
+
local_join(target, "nesteddir"),
|
294 |
+
],
|
295 |
+
recursive=True,
|
296 |
+
)
|
297 |
+
assert local_fs.ls(target) == []
|
298 |
+
|
299 |
+
# Limit recursive by maxdepth
|
300 |
+
fs.get(
|
301 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
302 |
+
)
|
303 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
304 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
305 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
306 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
307 |
+
|
308 |
+
local_fs.rm(
|
309 |
+
[
|
310 |
+
local_join(target, "subfile1"),
|
311 |
+
local_join(target, "subfile2"),
|
312 |
+
],
|
313 |
+
recursive=True,
|
314 |
+
)
|
315 |
+
assert local_fs.ls(target) == []
|
316 |
+
|
317 |
+
def test_get_glob_to_new_directory(
|
318 |
+
self,
|
319 |
+
fs,
|
320 |
+
fs_join,
|
321 |
+
fs_bulk_operations_scenario_0,
|
322 |
+
local_fs,
|
323 |
+
local_join,
|
324 |
+
local_target,
|
325 |
+
):
|
326 |
+
# Copy scenario 1h
|
327 |
+
source = fs_bulk_operations_scenario_0
|
328 |
+
|
329 |
+
target = local_target
|
330 |
+
local_fs.mkdir(target)
|
331 |
+
|
332 |
+
for target_slash in [False, True]:
|
333 |
+
t = fs_join(target, "newdir")
|
334 |
+
if target_slash:
|
335 |
+
t += "/"
|
336 |
+
|
337 |
+
# Without recursive
|
338 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
339 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
340 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
341 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
342 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
343 |
+
assert not local_fs.exists(
|
344 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
345 |
+
)
|
346 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
347 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
348 |
+
|
349 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
350 |
+
assert local_fs.ls(target) == []
|
351 |
+
|
352 |
+
# With recursive
|
353 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
354 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
355 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
356 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
357 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
358 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
359 |
+
assert local_fs.isfile(
|
360 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
361 |
+
)
|
362 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
363 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
364 |
+
|
365 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
366 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
367 |
+
|
368 |
+
# Limit recursive by maxdepth
|
369 |
+
fs.get(
|
370 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
371 |
+
)
|
372 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
373 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
374 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
375 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
376 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
377 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
378 |
+
|
379 |
+
local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
|
380 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
381 |
+
|
382 |
+
@pytest.mark.parametrize(
|
383 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
384 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
385 |
+
)
|
386 |
+
def test_get_glob_edge_cases(
|
387 |
+
self,
|
388 |
+
path,
|
389 |
+
recursive,
|
390 |
+
maxdepth,
|
391 |
+
expected,
|
392 |
+
fs,
|
393 |
+
fs_join,
|
394 |
+
fs_glob_edge_cases_files,
|
395 |
+
local_fs,
|
396 |
+
local_join,
|
397 |
+
local_target,
|
398 |
+
):
|
399 |
+
# Copy scenario 1g
|
400 |
+
source = fs_glob_edge_cases_files
|
401 |
+
|
402 |
+
target = local_target
|
403 |
+
|
404 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
405 |
+
local_fs.mkdir(target)
|
406 |
+
|
407 |
+
t = local_join(target, "newdir") if new_dir else target
|
408 |
+
t = t + "/" if target_slash else t
|
409 |
+
|
410 |
+
fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
411 |
+
|
412 |
+
output = local_fs.find(target)
|
413 |
+
if new_dir:
|
414 |
+
prefixed_expected = [
|
415 |
+
make_path_posix(local_join(target, "newdir", p)) for p in expected
|
416 |
+
]
|
417 |
+
else:
|
418 |
+
prefixed_expected = [
|
419 |
+
make_path_posix(local_join(target, p)) for p in expected
|
420 |
+
]
|
421 |
+
assert sorted(output) == sorted(prefixed_expected)
|
422 |
+
|
423 |
+
try:
|
424 |
+
local_fs.rm(target, recursive=True)
|
425 |
+
except FileNotFoundError:
|
426 |
+
pass
|
427 |
+
|
428 |
+
def test_get_list_of_files_to_existing_directory(
|
429 |
+
self,
|
430 |
+
fs,
|
431 |
+
fs_join,
|
432 |
+
fs_bulk_operations_scenario_0,
|
433 |
+
local_fs,
|
434 |
+
local_join,
|
435 |
+
local_target,
|
436 |
+
):
|
437 |
+
# Copy scenario 2a
|
438 |
+
source = fs_bulk_operations_scenario_0
|
439 |
+
|
440 |
+
target = local_target
|
441 |
+
local_fs.mkdir(target)
|
442 |
+
|
443 |
+
source_files = [
|
444 |
+
fs_join(source, "file1"),
|
445 |
+
fs_join(source, "file2"),
|
446 |
+
fs_join(source, "subdir", "subfile1"),
|
447 |
+
]
|
448 |
+
|
449 |
+
for target_slash in [False, True]:
|
450 |
+
t = target + "/" if target_slash else target
|
451 |
+
|
452 |
+
fs.get(source_files, t)
|
453 |
+
assert local_fs.isfile(local_join(target, "file1"))
|
454 |
+
assert local_fs.isfile(local_join(target, "file2"))
|
455 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
456 |
+
|
457 |
+
local_fs.rm(
|
458 |
+
[
|
459 |
+
local_join(target, "file1"),
|
460 |
+
local_join(target, "file2"),
|
461 |
+
local_join(target, "subfile1"),
|
462 |
+
],
|
463 |
+
recursive=True,
|
464 |
+
)
|
465 |
+
assert local_fs.ls(target) == []
|
466 |
+
|
467 |
+
def test_get_list_of_files_to_new_directory(
|
468 |
+
self,
|
469 |
+
fs,
|
470 |
+
fs_join,
|
471 |
+
fs_bulk_operations_scenario_0,
|
472 |
+
local_fs,
|
473 |
+
local_join,
|
474 |
+
local_target,
|
475 |
+
):
|
476 |
+
# Copy scenario 2b
|
477 |
+
source = fs_bulk_operations_scenario_0
|
478 |
+
|
479 |
+
target = local_target
|
480 |
+
local_fs.mkdir(target)
|
481 |
+
|
482 |
+
source_files = [
|
483 |
+
fs_join(source, "file1"),
|
484 |
+
fs_join(source, "file2"),
|
485 |
+
fs_join(source, "subdir", "subfile1"),
|
486 |
+
]
|
487 |
+
|
488 |
+
fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
|
489 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
490 |
+
assert local_fs.isfile(local_join(target, "newdir", "file1"))
|
491 |
+
assert local_fs.isfile(local_join(target, "newdir", "file2"))
|
492 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
493 |
+
|
494 |
+
def test_get_directory_recursive(
|
495 |
+
self, fs, fs_join, fs_path, local_fs, local_join, local_target
|
496 |
+
):
|
497 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
498 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
499 |
+
src = fs_join(fs_path, "src")
|
500 |
+
src_file = fs_join(src, "file")
|
501 |
+
fs.mkdir(src)
|
502 |
+
fs.touch(src_file)
|
503 |
+
|
504 |
+
target = local_target
|
505 |
+
|
506 |
+
# get without slash
|
507 |
+
assert not local_fs.exists(target)
|
508 |
+
for loop in range(2):
|
509 |
+
fs.get(src, target, recursive=True)
|
510 |
+
assert local_fs.isdir(target)
|
511 |
+
|
512 |
+
if loop == 0:
|
513 |
+
assert local_fs.isfile(local_join(target, "file"))
|
514 |
+
assert not local_fs.exists(local_join(target, "src"))
|
515 |
+
else:
|
516 |
+
assert local_fs.isfile(local_join(target, "file"))
|
517 |
+
assert local_fs.isdir(local_join(target, "src"))
|
518 |
+
assert local_fs.isfile(local_join(target, "src", "file"))
|
519 |
+
|
520 |
+
local_fs.rm(target, recursive=True)
|
521 |
+
|
522 |
+
# get with slash
|
523 |
+
assert not local_fs.exists(target)
|
524 |
+
for loop in range(2):
|
525 |
+
fs.get(src + "/", target, recursive=True)
|
526 |
+
assert local_fs.isdir(target)
|
527 |
+
assert local_fs.isfile(local_join(target, "file"))
|
528 |
+
assert not local_fs.exists(local_join(target, "src"))
|
529 |
+
|
530 |
+
def test_get_directory_without_files_with_same_name_prefix(
|
531 |
+
self,
|
532 |
+
fs,
|
533 |
+
fs_join,
|
534 |
+
local_fs,
|
535 |
+
local_join,
|
536 |
+
local_target,
|
537 |
+
fs_dir_and_file_with_same_name_prefix,
|
538 |
+
):
|
539 |
+
# Create the test dirs
|
540 |
+
source = fs_dir_and_file_with_same_name_prefix
|
541 |
+
target = local_target
|
542 |
+
|
543 |
+
# Test without glob
|
544 |
+
fs.get(fs_join(source, "subdir"), target, recursive=True)
|
545 |
+
|
546 |
+
assert local_fs.isfile(local_join(target, "subfile.txt"))
|
547 |
+
assert not local_fs.isfile(local_join(target, "subdir.txt"))
|
548 |
+
|
549 |
+
local_fs.rm([local_join(target, "subfile.txt")])
|
550 |
+
assert local_fs.ls(target) == []
|
551 |
+
|
552 |
+
# Test with glob
|
553 |
+
fs.get(fs_join(source, "subdir*"), target, recursive=True)
|
554 |
+
|
555 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
556 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
|
557 |
+
assert local_fs.isfile(local_join(target, "subdir.txt"))
|
558 |
+
|
559 |
+
def test_get_with_source_and_destination_as_list(
|
560 |
+
self,
|
561 |
+
fs,
|
562 |
+
fs_join,
|
563 |
+
local_fs,
|
564 |
+
local_join,
|
565 |
+
local_target,
|
566 |
+
fs_10_files_with_hashed_names,
|
567 |
+
):
|
568 |
+
# Create the test dir
|
569 |
+
source = fs_10_files_with_hashed_names
|
570 |
+
target = local_target
|
571 |
+
|
572 |
+
# Create list of files for source and destination
|
573 |
+
source_files = []
|
574 |
+
destination_files = []
|
575 |
+
for i in range(10):
|
576 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
577 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
578 |
+
destination_files.append(
|
579 |
+
make_path_posix(local_join(target, f"{hashed_i}.txt"))
|
580 |
+
)
|
581 |
+
|
582 |
+
# Copy and assert order was kept
|
583 |
+
fs.get(rpath=source_files, lpath=destination_files)
|
584 |
+
|
585 |
+
for i in range(10):
|
586 |
+
file_content = local_fs.cat(destination_files[i]).decode("utf-8")
|
587 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/tests/abstract/put.py
ADDED
@@ -0,0 +1,577 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
7 |
+
|
8 |
+
|
9 |
+
class AbstractPutTests:
|
10 |
+
def test_put_file_to_existing_directory(
|
11 |
+
self,
|
12 |
+
fs,
|
13 |
+
fs_join,
|
14 |
+
fs_target,
|
15 |
+
local_join,
|
16 |
+
local_bulk_operations_scenario_0,
|
17 |
+
supports_empty_directories,
|
18 |
+
):
|
19 |
+
# Copy scenario 1a
|
20 |
+
source = local_bulk_operations_scenario_0
|
21 |
+
|
22 |
+
target = fs_target
|
23 |
+
fs.mkdir(target)
|
24 |
+
if not supports_empty_directories:
|
25 |
+
# Force target directory to exist by adding a dummy file
|
26 |
+
fs.touch(fs_join(target, "dummy"))
|
27 |
+
assert fs.isdir(target)
|
28 |
+
|
29 |
+
target_file2 = fs_join(target, "file2")
|
30 |
+
target_subfile1 = fs_join(target, "subfile1")
|
31 |
+
|
32 |
+
# Copy from source directory
|
33 |
+
fs.put(local_join(source, "file2"), target)
|
34 |
+
assert fs.isfile(target_file2)
|
35 |
+
|
36 |
+
# Copy from sub directory
|
37 |
+
fs.put(local_join(source, "subdir", "subfile1"), target)
|
38 |
+
assert fs.isfile(target_subfile1)
|
39 |
+
|
40 |
+
# Remove copied files
|
41 |
+
fs.rm([target_file2, target_subfile1])
|
42 |
+
assert not fs.exists(target_file2)
|
43 |
+
assert not fs.exists(target_subfile1)
|
44 |
+
|
45 |
+
# Repeat with trailing slash on target
|
46 |
+
fs.put(local_join(source, "file2"), target + "/")
|
47 |
+
assert fs.isdir(target)
|
48 |
+
assert fs.isfile(target_file2)
|
49 |
+
|
50 |
+
fs.put(local_join(source, "subdir", "subfile1"), target + "/")
|
51 |
+
assert fs.isfile(target_subfile1)
|
52 |
+
|
53 |
+
def test_put_file_to_new_directory(
|
54 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
55 |
+
):
|
56 |
+
# Copy scenario 1b
|
57 |
+
source = local_bulk_operations_scenario_0
|
58 |
+
|
59 |
+
target = fs_target
|
60 |
+
fs.mkdir(target)
|
61 |
+
|
62 |
+
fs.put(
|
63 |
+
local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
64 |
+
) # Note trailing slash
|
65 |
+
assert fs.isdir(target)
|
66 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
67 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
68 |
+
|
69 |
+
def test_put_file_to_file_in_existing_directory(
|
70 |
+
self,
|
71 |
+
fs,
|
72 |
+
fs_join,
|
73 |
+
fs_target,
|
74 |
+
local_join,
|
75 |
+
supports_empty_directories,
|
76 |
+
local_bulk_operations_scenario_0,
|
77 |
+
):
|
78 |
+
# Copy scenario 1c
|
79 |
+
source = local_bulk_operations_scenario_0
|
80 |
+
|
81 |
+
target = fs_target
|
82 |
+
fs.mkdir(target)
|
83 |
+
if not supports_empty_directories:
|
84 |
+
# Force target directory to exist by adding a dummy file
|
85 |
+
fs.touch(fs_join(target, "dummy"))
|
86 |
+
assert fs.isdir(target)
|
87 |
+
|
88 |
+
fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
89 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
90 |
+
|
91 |
+
def test_put_file_to_file_in_new_directory(
|
92 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
93 |
+
):
|
94 |
+
# Copy scenario 1d
|
95 |
+
source = local_bulk_operations_scenario_0
|
96 |
+
|
97 |
+
target = fs_target
|
98 |
+
fs.mkdir(target)
|
99 |
+
|
100 |
+
fs.put(
|
101 |
+
local_join(source, "subdir", "subfile1"),
|
102 |
+
fs_join(target, "newdir", "newfile"),
|
103 |
+
)
|
104 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
105 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
106 |
+
|
107 |
+
def test_put_directory_to_existing_directory(
|
108 |
+
self,
|
109 |
+
fs,
|
110 |
+
fs_join,
|
111 |
+
fs_target,
|
112 |
+
local_bulk_operations_scenario_0,
|
113 |
+
supports_empty_directories,
|
114 |
+
):
|
115 |
+
# Copy scenario 1e
|
116 |
+
source = local_bulk_operations_scenario_0
|
117 |
+
|
118 |
+
target = fs_target
|
119 |
+
fs.mkdir(target)
|
120 |
+
if not supports_empty_directories:
|
121 |
+
# Force target directory to exist by adding a dummy file
|
122 |
+
dummy = fs_join(target, "dummy")
|
123 |
+
fs.touch(dummy)
|
124 |
+
assert fs.isdir(target)
|
125 |
+
|
126 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
127 |
+
s = fs_join(source, "subdir")
|
128 |
+
if source_slash:
|
129 |
+
s += "/"
|
130 |
+
t = target + "/" if target_slash else target
|
131 |
+
|
132 |
+
# Without recursive does nothing
|
133 |
+
fs.put(s, t)
|
134 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
135 |
+
|
136 |
+
# With recursive
|
137 |
+
fs.put(s, t, recursive=True)
|
138 |
+
if source_slash:
|
139 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
140 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
141 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
142 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
143 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
144 |
+
|
145 |
+
fs.rm(
|
146 |
+
[
|
147 |
+
fs_join(target, "subfile1"),
|
148 |
+
fs_join(target, "subfile2"),
|
149 |
+
fs_join(target, "nesteddir"),
|
150 |
+
],
|
151 |
+
recursive=True,
|
152 |
+
)
|
153 |
+
else:
|
154 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
155 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
156 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
157 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
158 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
159 |
+
|
160 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
161 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
162 |
+
|
163 |
+
# Limit recursive by maxdepth
|
164 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
165 |
+
if source_slash:
|
166 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
167 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
168 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
169 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
170 |
+
|
171 |
+
fs.rm(
|
172 |
+
[
|
173 |
+
fs_join(target, "subfile1"),
|
174 |
+
fs_join(target, "subfile2"),
|
175 |
+
],
|
176 |
+
recursive=True,
|
177 |
+
)
|
178 |
+
else:
|
179 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
180 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
181 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
182 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
183 |
+
|
184 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
185 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
186 |
+
|
187 |
+
def test_put_directory_to_new_directory(
|
188 |
+
self,
|
189 |
+
fs,
|
190 |
+
fs_join,
|
191 |
+
fs_target,
|
192 |
+
local_bulk_operations_scenario_0,
|
193 |
+
supports_empty_directories,
|
194 |
+
):
|
195 |
+
# Copy scenario 1f
|
196 |
+
source = local_bulk_operations_scenario_0
|
197 |
+
|
198 |
+
target = fs_target
|
199 |
+
fs.mkdir(target)
|
200 |
+
|
201 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
202 |
+
s = fs_join(source, "subdir")
|
203 |
+
if source_slash:
|
204 |
+
s += "/"
|
205 |
+
t = fs_join(target, "newdir")
|
206 |
+
if target_slash:
|
207 |
+
t += "/"
|
208 |
+
|
209 |
+
# Without recursive does nothing
|
210 |
+
fs.put(s, t)
|
211 |
+
if supports_empty_directories:
|
212 |
+
assert fs.ls(target) == []
|
213 |
+
else:
|
214 |
+
with pytest.raises(FileNotFoundError):
|
215 |
+
fs.ls(target)
|
216 |
+
|
217 |
+
# With recursive
|
218 |
+
fs.put(s, t, recursive=True)
|
219 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
220 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
221 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
222 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
223 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
224 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
225 |
+
|
226 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
227 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
228 |
+
|
229 |
+
# Limit recursive by maxdepth
|
230 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
231 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
232 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
233 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
234 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
235 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
236 |
+
|
237 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
238 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
239 |
+
|
240 |
+
def test_put_glob_to_existing_directory(
|
241 |
+
self,
|
242 |
+
fs,
|
243 |
+
fs_join,
|
244 |
+
fs_target,
|
245 |
+
local_join,
|
246 |
+
supports_empty_directories,
|
247 |
+
local_bulk_operations_scenario_0,
|
248 |
+
):
|
249 |
+
# Copy scenario 1g
|
250 |
+
source = local_bulk_operations_scenario_0
|
251 |
+
|
252 |
+
target = fs_target
|
253 |
+
fs.mkdir(target)
|
254 |
+
if not supports_empty_directories:
|
255 |
+
# Force target directory to exist by adding a dummy file
|
256 |
+
dummy = fs_join(target, "dummy")
|
257 |
+
fs.touch(dummy)
|
258 |
+
assert fs.isdir(target)
|
259 |
+
|
260 |
+
for target_slash in [False, True]:
|
261 |
+
t = target + "/" if target_slash else target
|
262 |
+
|
263 |
+
# Without recursive
|
264 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
265 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
266 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
267 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
268 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
269 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
270 |
+
|
271 |
+
fs.rm(
|
272 |
+
[
|
273 |
+
fs_join(target, "subfile1"),
|
274 |
+
fs_join(target, "subfile2"),
|
275 |
+
],
|
276 |
+
recursive=True,
|
277 |
+
)
|
278 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
279 |
+
|
280 |
+
# With recursive
|
281 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
282 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
283 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
284 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
285 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
286 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
287 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
288 |
+
|
289 |
+
fs.rm(
|
290 |
+
[
|
291 |
+
fs_join(target, "subfile1"),
|
292 |
+
fs_join(target, "subfile2"),
|
293 |
+
fs_join(target, "nesteddir"),
|
294 |
+
],
|
295 |
+
recursive=True,
|
296 |
+
)
|
297 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
298 |
+
|
299 |
+
# Limit recursive by maxdepth
|
300 |
+
fs.put(
|
301 |
+
local_join(source, "subdir", glob),
|
302 |
+
t,
|
303 |
+
recursive=recursive,
|
304 |
+
maxdepth=1,
|
305 |
+
)
|
306 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
307 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
308 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
309 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
310 |
+
|
311 |
+
fs.rm(
|
312 |
+
[
|
313 |
+
fs_join(target, "subfile1"),
|
314 |
+
fs_join(target, "subfile2"),
|
315 |
+
],
|
316 |
+
recursive=True,
|
317 |
+
)
|
318 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
319 |
+
|
320 |
+
def test_put_glob_to_new_directory(
|
321 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
322 |
+
):
|
323 |
+
# Copy scenario 1h
|
324 |
+
source = local_bulk_operations_scenario_0
|
325 |
+
|
326 |
+
target = fs_target
|
327 |
+
fs.mkdir(target)
|
328 |
+
|
329 |
+
for target_slash in [False, True]:
|
330 |
+
t = fs_join(target, "newdir")
|
331 |
+
if target_slash:
|
332 |
+
t += "/"
|
333 |
+
|
334 |
+
# Without recursive
|
335 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
336 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
337 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
338 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
339 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
340 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
341 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
342 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
343 |
+
|
344 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
345 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
346 |
+
|
347 |
+
# With recursive
|
348 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
349 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
350 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
351 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
352 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
353 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
354 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
355 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
356 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
357 |
+
|
358 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
359 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
360 |
+
|
361 |
+
# Limit recursive by maxdepth
|
362 |
+
fs.put(
|
363 |
+
local_join(source, "subdir", glob),
|
364 |
+
t,
|
365 |
+
recursive=recursive,
|
366 |
+
maxdepth=1,
|
367 |
+
)
|
368 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
369 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
370 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
371 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
372 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
373 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
374 |
+
|
375 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
376 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
377 |
+
|
378 |
+
@pytest.mark.parametrize(
|
379 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
380 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
381 |
+
)
|
382 |
+
def test_put_glob_edge_cases(
|
383 |
+
self,
|
384 |
+
path,
|
385 |
+
recursive,
|
386 |
+
maxdepth,
|
387 |
+
expected,
|
388 |
+
fs,
|
389 |
+
fs_join,
|
390 |
+
fs_target,
|
391 |
+
local_glob_edge_cases_files,
|
392 |
+
local_join,
|
393 |
+
fs_sanitize_path,
|
394 |
+
):
|
395 |
+
# Copy scenario 1g
|
396 |
+
source = local_glob_edge_cases_files
|
397 |
+
|
398 |
+
target = fs_target
|
399 |
+
|
400 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
401 |
+
fs.mkdir(target)
|
402 |
+
|
403 |
+
t = fs_join(target, "newdir") if new_dir else target
|
404 |
+
t = t + "/" if target_slash else t
|
405 |
+
|
406 |
+
fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
407 |
+
|
408 |
+
output = fs.find(target)
|
409 |
+
if new_dir:
|
410 |
+
prefixed_expected = [
|
411 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
412 |
+
]
|
413 |
+
else:
|
414 |
+
prefixed_expected = [
|
415 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
416 |
+
]
|
417 |
+
assert sorted(output) == sorted(prefixed_expected)
|
418 |
+
|
419 |
+
try:
|
420 |
+
fs.rm(target, recursive=True)
|
421 |
+
except FileNotFoundError:
|
422 |
+
pass
|
423 |
+
|
424 |
+
def test_put_list_of_files_to_existing_directory(
|
425 |
+
self,
|
426 |
+
fs,
|
427 |
+
fs_join,
|
428 |
+
fs_target,
|
429 |
+
local_join,
|
430 |
+
local_bulk_operations_scenario_0,
|
431 |
+
supports_empty_directories,
|
432 |
+
):
|
433 |
+
# Copy scenario 2a
|
434 |
+
source = local_bulk_operations_scenario_0
|
435 |
+
|
436 |
+
target = fs_target
|
437 |
+
fs.mkdir(target)
|
438 |
+
if not supports_empty_directories:
|
439 |
+
# Force target directory to exist by adding a dummy file
|
440 |
+
dummy = fs_join(target, "dummy")
|
441 |
+
fs.touch(dummy)
|
442 |
+
assert fs.isdir(target)
|
443 |
+
|
444 |
+
source_files = [
|
445 |
+
local_join(source, "file1"),
|
446 |
+
local_join(source, "file2"),
|
447 |
+
local_join(source, "subdir", "subfile1"),
|
448 |
+
]
|
449 |
+
|
450 |
+
for target_slash in [False, True]:
|
451 |
+
t = target + "/" if target_slash else target
|
452 |
+
|
453 |
+
fs.put(source_files, t)
|
454 |
+
assert fs.isfile(fs_join(target, "file1"))
|
455 |
+
assert fs.isfile(fs_join(target, "file2"))
|
456 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
457 |
+
|
458 |
+
fs.rm(
|
459 |
+
[
|
460 |
+
fs_join(target, "file1"),
|
461 |
+
fs_join(target, "file2"),
|
462 |
+
fs_join(target, "subfile1"),
|
463 |
+
],
|
464 |
+
recursive=True,
|
465 |
+
)
|
466 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
467 |
+
|
468 |
+
def test_put_list_of_files_to_new_directory(
|
469 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
470 |
+
):
|
471 |
+
# Copy scenario 2b
|
472 |
+
source = local_bulk_operations_scenario_0
|
473 |
+
|
474 |
+
target = fs_target
|
475 |
+
fs.mkdir(target)
|
476 |
+
|
477 |
+
source_files = [
|
478 |
+
local_join(source, "file1"),
|
479 |
+
local_join(source, "file2"),
|
480 |
+
local_join(source, "subdir", "subfile1"),
|
481 |
+
]
|
482 |
+
|
483 |
+
fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
484 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
485 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
486 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
487 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
488 |
+
|
489 |
+
def test_put_directory_recursive(
|
490 |
+
self, fs, fs_join, fs_target, local_fs, local_join, local_path
|
491 |
+
):
|
492 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
493 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
494 |
+
src = local_join(local_path, "src")
|
495 |
+
src_file = local_join(src, "file")
|
496 |
+
local_fs.mkdir(src)
|
497 |
+
local_fs.touch(src_file)
|
498 |
+
|
499 |
+
target = fs_target
|
500 |
+
|
501 |
+
# put without slash
|
502 |
+
assert not fs.exists(target)
|
503 |
+
for loop in range(2):
|
504 |
+
fs.put(src, target, recursive=True)
|
505 |
+
assert fs.isdir(target)
|
506 |
+
|
507 |
+
if loop == 0:
|
508 |
+
assert fs.isfile(fs_join(target, "file"))
|
509 |
+
assert not fs.exists(fs_join(target, "src"))
|
510 |
+
else:
|
511 |
+
assert fs.isfile(fs_join(target, "file"))
|
512 |
+
assert fs.isdir(fs_join(target, "src"))
|
513 |
+
assert fs.isfile(fs_join(target, "src", "file"))
|
514 |
+
|
515 |
+
fs.rm(target, recursive=True)
|
516 |
+
|
517 |
+
# put with slash
|
518 |
+
assert not fs.exists(target)
|
519 |
+
for loop in range(2):
|
520 |
+
fs.put(src + "/", target, recursive=True)
|
521 |
+
assert fs.isdir(target)
|
522 |
+
assert fs.isfile(fs_join(target, "file"))
|
523 |
+
assert not fs.exists(fs_join(target, "src"))
|
524 |
+
|
525 |
+
def test_put_directory_without_files_with_same_name_prefix(
|
526 |
+
self,
|
527 |
+
fs,
|
528 |
+
fs_join,
|
529 |
+
fs_target,
|
530 |
+
local_join,
|
531 |
+
local_dir_and_file_with_same_name_prefix,
|
532 |
+
supports_empty_directories,
|
533 |
+
):
|
534 |
+
# Create the test dirs
|
535 |
+
source = local_dir_and_file_with_same_name_prefix
|
536 |
+
target = fs_target
|
537 |
+
|
538 |
+
# Test without glob
|
539 |
+
fs.put(local_join(source, "subdir"), fs_target, recursive=True)
|
540 |
+
|
541 |
+
assert fs.isfile(fs_join(fs_target, "subfile.txt"))
|
542 |
+
assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
|
543 |
+
|
544 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
545 |
+
if supports_empty_directories:
|
546 |
+
assert fs.ls(target) == []
|
547 |
+
else:
|
548 |
+
assert not fs.exists(target)
|
549 |
+
|
550 |
+
# Test with glob
|
551 |
+
fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
|
552 |
+
|
553 |
+
assert fs.isdir(fs_join(fs_target, "subdir"))
|
554 |
+
assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
|
555 |
+
assert fs.isfile(fs_join(fs_target, "subdir.txt"))
|
556 |
+
|
557 |
+
def test_copy_with_source_and_destination_as_list(
|
558 |
+
self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
|
559 |
+
):
|
560 |
+
# Create the test dir
|
561 |
+
source = local_10_files_with_hashed_names
|
562 |
+
target = fs_target
|
563 |
+
|
564 |
+
# Create list of files for source and destination
|
565 |
+
source_files = []
|
566 |
+
destination_files = []
|
567 |
+
for i in range(10):
|
568 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
569 |
+
source_files.append(local_join(source, f"{hashed_i}.txt"))
|
570 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
571 |
+
|
572 |
+
# Copy and assert order was kept
|
573 |
+
fs.put(lpath=source_files, rpath=destination_files)
|
574 |
+
|
575 |
+
for i in range(10):
|
576 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
577 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/transaction.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import deque
|
2 |
+
|
3 |
+
|
4 |
+
class Transaction:
|
5 |
+
"""Filesystem transaction write context
|
6 |
+
|
7 |
+
Gathers files for deferred commit or discard, so that several write
|
8 |
+
operations can be finalized semi-atomically. This works by having this
|
9 |
+
instance as the ``.transaction`` attribute of the given filesystem
|
10 |
+
"""
|
11 |
+
|
12 |
+
def __init__(self, fs):
|
13 |
+
"""
|
14 |
+
Parameters
|
15 |
+
----------
|
16 |
+
fs: FileSystem instance
|
17 |
+
"""
|
18 |
+
self.fs = fs
|
19 |
+
self.files = deque()
|
20 |
+
|
21 |
+
def __enter__(self):
|
22 |
+
self.start()
|
23 |
+
return self
|
24 |
+
|
25 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
26 |
+
"""End transaction and commit, if exit is not due to exception"""
|
27 |
+
# only commit if there was no exception
|
28 |
+
self.complete(commit=exc_type is None)
|
29 |
+
self.fs._intrans = False
|
30 |
+
self.fs._transaction = None
|
31 |
+
|
32 |
+
def start(self):
|
33 |
+
"""Start a transaction on this FileSystem"""
|
34 |
+
self.files = deque() # clean up after previous failed completions
|
35 |
+
self.fs._intrans = True
|
36 |
+
|
37 |
+
def complete(self, commit=True):
|
38 |
+
"""Finish transaction: commit or discard all deferred files"""
|
39 |
+
while self.files:
|
40 |
+
f = self.files.popleft()
|
41 |
+
if commit:
|
42 |
+
f.commit()
|
43 |
+
else:
|
44 |
+
f.discard()
|
45 |
+
self.fs._intrans = False
|
46 |
+
|
47 |
+
|
48 |
+
class FileActor:
|
49 |
+
def __init__(self):
|
50 |
+
self.files = []
|
51 |
+
|
52 |
+
def commit(self):
|
53 |
+
for f in self.files:
|
54 |
+
f.commit()
|
55 |
+
self.files.clear()
|
56 |
+
|
57 |
+
def discard(self):
|
58 |
+
for f in self.files:
|
59 |
+
f.discard()
|
60 |
+
self.files.clear()
|
61 |
+
|
62 |
+
def append(self, f):
|
63 |
+
self.files.append(f)
|
64 |
+
|
65 |
+
|
66 |
+
class DaskTransaction(Transaction):
|
67 |
+
def __init__(self, fs):
|
68 |
+
"""
|
69 |
+
Parameters
|
70 |
+
----------
|
71 |
+
fs: FileSystem instance
|
72 |
+
"""
|
73 |
+
import distributed
|
74 |
+
|
75 |
+
super().__init__(fs)
|
76 |
+
client = distributed.default_client()
|
77 |
+
self.files = client.submit(FileActor, actor=True).result()
|
78 |
+
|
79 |
+
def complete(self, commit=True):
|
80 |
+
"""Finish transaction: commit or discard all deferred files"""
|
81 |
+
if commit:
|
82 |
+
self.files.commit().result()
|
83 |
+
else:
|
84 |
+
self.files.discard().result()
|
85 |
+
self.fs._intrans = False
|
lib/python3.11/site-packages/fsspec/utils.py
ADDED
@@ -0,0 +1,742 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import contextlib
|
4 |
+
import logging
|
5 |
+
import math
|
6 |
+
import os
|
7 |
+
import pathlib
|
8 |
+
import re
|
9 |
+
import sys
|
10 |
+
import tempfile
|
11 |
+
from functools import partial
|
12 |
+
from hashlib import md5
|
13 |
+
from importlib.metadata import version
|
14 |
+
from typing import (
|
15 |
+
IO,
|
16 |
+
TYPE_CHECKING,
|
17 |
+
Any,
|
18 |
+
Callable,
|
19 |
+
Iterable,
|
20 |
+
Iterator,
|
21 |
+
Sequence,
|
22 |
+
TypeVar,
|
23 |
+
)
|
24 |
+
from urllib.parse import urlsplit
|
25 |
+
|
26 |
+
if TYPE_CHECKING:
|
27 |
+
from typing_extensions import TypeGuard
|
28 |
+
|
29 |
+
from fsspec.spec import AbstractFileSystem
|
30 |
+
|
31 |
+
|
32 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
33 |
+
|
34 |
+
T = TypeVar("T")
|
35 |
+
|
36 |
+
|
37 |
+
def infer_storage_options(
|
38 |
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
39 |
+
) -> dict[str, Any]:
|
40 |
+
"""Infer storage options from URL path and merge it with existing storage
|
41 |
+
options.
|
42 |
+
|
43 |
+
Parameters
|
44 |
+
----------
|
45 |
+
urlpath: str or unicode
|
46 |
+
Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
|
47 |
+
inherit_storage_options: dict (optional)
|
48 |
+
Its contents will get merged with the inferred information from the
|
49 |
+
given path
|
50 |
+
|
51 |
+
Returns
|
52 |
+
-------
|
53 |
+
Storage options dict.
|
54 |
+
|
55 |
+
Examples
|
56 |
+
--------
|
57 |
+
>>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
|
58 |
+
{"protocol": "file", "path", "/mnt/datasets/test.csv"}
|
59 |
+
>>> infer_storage_options(
|
60 |
+
... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
|
61 |
+
... inherit_storage_options={'extra': 'value'},
|
62 |
+
... ) # doctest: +SKIP
|
63 |
+
{"protocol": "hdfs", "username": "username", "password": "pwd",
|
64 |
+
"host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
|
65 |
+
"url_query": "q=1", "extra": "value"}
|
66 |
+
"""
|
67 |
+
# Handle Windows paths including disk name in this special case
|
68 |
+
if (
|
69 |
+
re.match(r"^[a-zA-Z]:[\\/]", urlpath)
|
70 |
+
or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
|
71 |
+
):
|
72 |
+
return {"protocol": "file", "path": urlpath}
|
73 |
+
|
74 |
+
parsed_path = urlsplit(urlpath)
|
75 |
+
protocol = parsed_path.scheme or "file"
|
76 |
+
if parsed_path.fragment:
|
77 |
+
path = "#".join([parsed_path.path, parsed_path.fragment])
|
78 |
+
else:
|
79 |
+
path = parsed_path.path
|
80 |
+
if protocol == "file":
|
81 |
+
# Special case parsing file protocol URL on Windows according to:
|
82 |
+
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
83 |
+
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
84 |
+
if windows_path:
|
85 |
+
path = "%s:%s" % windows_path.groups()
|
86 |
+
|
87 |
+
if protocol in ["http", "https"]:
|
88 |
+
# for HTTP, we don't want to parse, as requests will anyway
|
89 |
+
return {"protocol": protocol, "path": urlpath}
|
90 |
+
|
91 |
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
92 |
+
|
93 |
+
if parsed_path.netloc:
|
94 |
+
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
95 |
+
# lowercases the hostname which is not always desirable (e.g. in S3):
|
96 |
+
# https://github.com/dask/dask/issues/1417
|
97 |
+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
98 |
+
|
99 |
+
if protocol in ("s3", "s3a", "gcs", "gs"):
|
100 |
+
options["path"] = options["host"] + options["path"]
|
101 |
+
else:
|
102 |
+
options["host"] = options["host"]
|
103 |
+
if parsed_path.port:
|
104 |
+
options["port"] = parsed_path.port
|
105 |
+
if parsed_path.username:
|
106 |
+
options["username"] = parsed_path.username
|
107 |
+
if parsed_path.password:
|
108 |
+
options["password"] = parsed_path.password
|
109 |
+
|
110 |
+
if parsed_path.query:
|
111 |
+
options["url_query"] = parsed_path.query
|
112 |
+
if parsed_path.fragment:
|
113 |
+
options["url_fragment"] = parsed_path.fragment
|
114 |
+
|
115 |
+
if inherit_storage_options:
|
116 |
+
update_storage_options(options, inherit_storage_options)
|
117 |
+
|
118 |
+
return options
|
119 |
+
|
120 |
+
|
121 |
+
def update_storage_options(
|
122 |
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
123 |
+
) -> None:
|
124 |
+
if not inherited:
|
125 |
+
inherited = {}
|
126 |
+
collisions = set(options) & set(inherited)
|
127 |
+
if collisions:
|
128 |
+
for collision in collisions:
|
129 |
+
if options.get(collision) != inherited.get(collision):
|
130 |
+
raise KeyError(
|
131 |
+
f"Collision between inferred and specified storage "
|
132 |
+
f"option:\n{collision}"
|
133 |
+
)
|
134 |
+
options.update(inherited)
|
135 |
+
|
136 |
+
|
137 |
+
# Compression extensions registered via fsspec.compression.register_compression
|
138 |
+
compressions: dict[str, str] = {}
|
139 |
+
|
140 |
+
|
141 |
+
def infer_compression(filename: str) -> str | None:
|
142 |
+
"""Infer compression, if available, from filename.
|
143 |
+
|
144 |
+
Infer a named compression type, if registered and available, from filename
|
145 |
+
extension. This includes builtin (gz, bz2, zip) compressions, as well as
|
146 |
+
optional compressions. See fsspec.compression.register_compression.
|
147 |
+
"""
|
148 |
+
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
149 |
+
if extension in compressions:
|
150 |
+
return compressions[extension]
|
151 |
+
return None
|
152 |
+
|
153 |
+
|
154 |
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
155 |
+
"""Returns a function that receives a single integer
|
156 |
+
and returns it as a string padded by enough zero characters
|
157 |
+
to align with maximum possible integer
|
158 |
+
|
159 |
+
>>> name_f = build_name_function(57)
|
160 |
+
|
161 |
+
>>> name_f(7)
|
162 |
+
'07'
|
163 |
+
>>> name_f(31)
|
164 |
+
'31'
|
165 |
+
>>> build_name_function(1000)(42)
|
166 |
+
'0042'
|
167 |
+
>>> build_name_function(999)(42)
|
168 |
+
'042'
|
169 |
+
>>> build_name_function(0)(0)
|
170 |
+
'0'
|
171 |
+
"""
|
172 |
+
# handle corner cases max_int is 0 or exact power of 10
|
173 |
+
max_int += 1e-8
|
174 |
+
|
175 |
+
pad_length = int(math.ceil(math.log10(max_int)))
|
176 |
+
|
177 |
+
def name_function(i: int) -> str:
|
178 |
+
return str(i).zfill(pad_length)
|
179 |
+
|
180 |
+
return name_function
|
181 |
+
|
182 |
+
|
183 |
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
184 |
+
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
185 |
+
|
186 |
+
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
187 |
+
a delimiting sequence, and file end. Use file.tell() to see location afterwards.
|
188 |
+
Note that file start is a valid split, so must be at offset > 0 to seek for
|
189 |
+
delimiter.
|
190 |
+
|
191 |
+
Parameters
|
192 |
+
----------
|
193 |
+
file: a file
|
194 |
+
delimiter: bytes
|
195 |
+
a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
|
196 |
+
blocksize: int
|
197 |
+
Number of bytes to read from the file at once.
|
198 |
+
|
199 |
+
|
200 |
+
Returns
|
201 |
+
-------
|
202 |
+
Returns True if a delimiter was found, False if at file start or end.
|
203 |
+
|
204 |
+
"""
|
205 |
+
|
206 |
+
if file.tell() == 0:
|
207 |
+
# beginning-of-file, return without seek
|
208 |
+
return False
|
209 |
+
|
210 |
+
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
211 |
+
# with result of file.read to preserve compatibility with text IO.
|
212 |
+
last: bytes | None = None
|
213 |
+
while True:
|
214 |
+
current = file.read(blocksize)
|
215 |
+
if not current:
|
216 |
+
# end-of-file without delimiter
|
217 |
+
return False
|
218 |
+
full = last + current if last else current
|
219 |
+
try:
|
220 |
+
if delimiter in full:
|
221 |
+
i = full.index(delimiter)
|
222 |
+
file.seek(file.tell() - (len(full) - i) + len(delimiter))
|
223 |
+
return True
|
224 |
+
elif len(current) < blocksize:
|
225 |
+
# end-of-file without delimiter
|
226 |
+
return False
|
227 |
+
except (OSError, ValueError):
|
228 |
+
pass
|
229 |
+
last = full[-len(delimiter) :]
|
230 |
+
|
231 |
+
|
232 |
+
def read_block(
|
233 |
+
f: IO[bytes],
|
234 |
+
offset: int,
|
235 |
+
length: int | None,
|
236 |
+
delimiter: bytes | None = None,
|
237 |
+
split_before: bool = False,
|
238 |
+
) -> bytes:
|
239 |
+
"""Read a block of bytes from a file
|
240 |
+
|
241 |
+
Parameters
|
242 |
+
----------
|
243 |
+
f: File
|
244 |
+
Open file
|
245 |
+
offset: int
|
246 |
+
Byte offset to start read
|
247 |
+
length: int
|
248 |
+
Number of bytes to read, read through end of file if None
|
249 |
+
delimiter: bytes (optional)
|
250 |
+
Ensure reading starts and stops at delimiter bytestring
|
251 |
+
split_before: bool (optional)
|
252 |
+
Start/stop read *before* delimiter bytestring.
|
253 |
+
|
254 |
+
|
255 |
+
If using the ``delimiter=`` keyword argument we ensure that the read
|
256 |
+
starts and stops at delimiter boundaries that follow the locations
|
257 |
+
``offset`` and ``offset + length``. If ``offset`` is zero then we
|
258 |
+
start at zero, regardless of delimiter. The bytestring returned WILL
|
259 |
+
include the terminating delimiter string.
|
260 |
+
|
261 |
+
Examples
|
262 |
+
--------
|
263 |
+
|
264 |
+
>>> from io import BytesIO # doctest: +SKIP
|
265 |
+
>>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
|
266 |
+
>>> read_block(f, 0, 13) # doctest: +SKIP
|
267 |
+
b'Alice, 100\\nBo'
|
268 |
+
|
269 |
+
>>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
270 |
+
b'Alice, 100\\nBob, 200\\n'
|
271 |
+
|
272 |
+
>>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
|
273 |
+
b'Bob, 200\\nCharlie, 300'
|
274 |
+
"""
|
275 |
+
if delimiter:
|
276 |
+
f.seek(offset)
|
277 |
+
found_start_delim = seek_delimiter(f, delimiter, 2**16)
|
278 |
+
if length is None:
|
279 |
+
return f.read()
|
280 |
+
start = f.tell()
|
281 |
+
length -= start - offset
|
282 |
+
|
283 |
+
f.seek(start + length)
|
284 |
+
found_end_delim = seek_delimiter(f, delimiter, 2**16)
|
285 |
+
end = f.tell()
|
286 |
+
|
287 |
+
# Adjust split location to before delimiter iff seek found the
|
288 |
+
# delimiter sequence, not start or end of file.
|
289 |
+
if found_start_delim and split_before:
|
290 |
+
start -= len(delimiter)
|
291 |
+
|
292 |
+
if found_end_delim and split_before:
|
293 |
+
end -= len(delimiter)
|
294 |
+
|
295 |
+
offset = start
|
296 |
+
length = end - start
|
297 |
+
|
298 |
+
f.seek(offset)
|
299 |
+
|
300 |
+
# TODO: allow length to be None and read to the end of the file?
|
301 |
+
assert length is not None
|
302 |
+
b = f.read(length)
|
303 |
+
return b
|
304 |
+
|
305 |
+
|
306 |
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
307 |
+
"""Deterministic token
|
308 |
+
|
309 |
+
(modified from dask.base)
|
310 |
+
|
311 |
+
>>> tokenize([1, 2, '3'])
|
312 |
+
'9d71491b50023b06fc76928e6eddb952'
|
313 |
+
|
314 |
+
>>> tokenize('Hello') == tokenize('Hello')
|
315 |
+
True
|
316 |
+
"""
|
317 |
+
if kwargs:
|
318 |
+
args += (kwargs,)
|
319 |
+
try:
|
320 |
+
h = md5(str(args).encode())
|
321 |
+
except ValueError:
|
322 |
+
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
323 |
+
h = md5(str(args).encode(), usedforsecurity=False)
|
324 |
+
return h.hexdigest()
|
325 |
+
|
326 |
+
|
327 |
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
328 |
+
"""Attempt to convert a path-like object to a string.
|
329 |
+
|
330 |
+
Parameters
|
331 |
+
----------
|
332 |
+
filepath: object to be converted
|
333 |
+
|
334 |
+
Returns
|
335 |
+
-------
|
336 |
+
filepath_str: maybe a string version of the object
|
337 |
+
|
338 |
+
Notes
|
339 |
+
-----
|
340 |
+
Objects supporting the fspath protocol are coerced according to its
|
341 |
+
__fspath__ method.
|
342 |
+
|
343 |
+
For backwards compatibility with older Python version, pathlib.Path
|
344 |
+
objects are specially coerced.
|
345 |
+
|
346 |
+
Any other object is passed through unchanged, which includes bytes,
|
347 |
+
strings, buffers, or anything else that's not even path-like.
|
348 |
+
"""
|
349 |
+
if isinstance(filepath, str):
|
350 |
+
return filepath
|
351 |
+
elif hasattr(filepath, "__fspath__"):
|
352 |
+
return filepath.__fspath__()
|
353 |
+
elif isinstance(filepath, pathlib.Path):
|
354 |
+
return str(filepath)
|
355 |
+
elif hasattr(filepath, "path"):
|
356 |
+
return filepath.path
|
357 |
+
else:
|
358 |
+
return filepath # type: ignore[return-value]
|
359 |
+
|
360 |
+
|
361 |
+
def make_instance(
|
362 |
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
363 |
+
) -> T:
|
364 |
+
inst = cls(*args, **kwargs)
|
365 |
+
inst._determine_worker() # type: ignore[attr-defined]
|
366 |
+
return inst
|
367 |
+
|
368 |
+
|
369 |
+
def common_prefix(paths: Iterable[str]) -> str:
|
370 |
+
"""For a list of paths, find the shortest prefix common to all"""
|
371 |
+
parts = [p.split("/") for p in paths]
|
372 |
+
lmax = min(len(p) for p in parts)
|
373 |
+
end = 0
|
374 |
+
for i in range(lmax):
|
375 |
+
end = all(p[i] == parts[0][i] for p in parts)
|
376 |
+
if not end:
|
377 |
+
break
|
378 |
+
i += end
|
379 |
+
return "/".join(parts[0][:i])
|
380 |
+
|
381 |
+
|
382 |
+
def other_paths(
|
383 |
+
paths: list[str],
|
384 |
+
path2: str | list[str],
|
385 |
+
exists: bool = False,
|
386 |
+
flatten: bool = False,
|
387 |
+
) -> list[str]:
|
388 |
+
"""In bulk file operations, construct a new file tree from a list of files
|
389 |
+
|
390 |
+
Parameters
|
391 |
+
----------
|
392 |
+
paths: list of str
|
393 |
+
The input file tree
|
394 |
+
path2: str or list of str
|
395 |
+
Root to construct the new list in. If this is already a list of str, we just
|
396 |
+
assert it has the right number of elements.
|
397 |
+
exists: bool (optional)
|
398 |
+
For a str destination, it is already exists (and is a dir), files should
|
399 |
+
end up inside.
|
400 |
+
flatten: bool (optional)
|
401 |
+
Whether to flatten the input directory tree structure so that the output files
|
402 |
+
are in the same directory.
|
403 |
+
|
404 |
+
Returns
|
405 |
+
-------
|
406 |
+
list of str
|
407 |
+
"""
|
408 |
+
|
409 |
+
if isinstance(path2, str):
|
410 |
+
path2 = path2.rstrip("/")
|
411 |
+
|
412 |
+
if flatten:
|
413 |
+
path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
|
414 |
+
else:
|
415 |
+
cp = common_prefix(paths)
|
416 |
+
if exists:
|
417 |
+
cp = cp.rsplit("/", 1)[0]
|
418 |
+
if not cp and all(not s.startswith("/") for s in paths):
|
419 |
+
path2 = ["/".join([path2, p]) for p in paths]
|
420 |
+
else:
|
421 |
+
path2 = [p.replace(cp, path2, 1) for p in paths]
|
422 |
+
else:
|
423 |
+
assert len(paths) == len(path2)
|
424 |
+
return path2
|
425 |
+
|
426 |
+
|
427 |
+
def is_exception(obj: Any) -> bool:
|
428 |
+
return isinstance(obj, BaseException)
|
429 |
+
|
430 |
+
|
431 |
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
432 |
+
for attr in ["read", "close", "tell"]:
|
433 |
+
if not hasattr(f, attr):
|
434 |
+
return False
|
435 |
+
return True
|
436 |
+
|
437 |
+
|
438 |
+
def get_protocol(url: str) -> str:
|
439 |
+
url = stringify_path(url)
|
440 |
+
parts = re.split(r"(\:\:|\://)", url, 1)
|
441 |
+
if len(parts) > 1:
|
442 |
+
return parts[0]
|
443 |
+
return "file"
|
444 |
+
|
445 |
+
|
446 |
+
def can_be_local(path: str) -> bool:
|
447 |
+
"""Can the given URL be used with open_local?"""
|
448 |
+
from fsspec import get_filesystem_class
|
449 |
+
|
450 |
+
try:
|
451 |
+
return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
|
452 |
+
except (ValueError, ImportError):
|
453 |
+
# not in registry or import failed
|
454 |
+
return False
|
455 |
+
|
456 |
+
|
457 |
+
def get_package_version_without_import(name: str) -> str | None:
|
458 |
+
"""For given package name, try to find the version without importing it
|
459 |
+
|
460 |
+
Import and package.__version__ is still the backup here, so an import
|
461 |
+
*might* happen.
|
462 |
+
|
463 |
+
Returns either the version string, or None if the package
|
464 |
+
or the version was not readily found.
|
465 |
+
"""
|
466 |
+
if name in sys.modules:
|
467 |
+
mod = sys.modules[name]
|
468 |
+
if hasattr(mod, "__version__"):
|
469 |
+
return mod.__version__
|
470 |
+
try:
|
471 |
+
return version(name)
|
472 |
+
except: # noqa: E722
|
473 |
+
pass
|
474 |
+
try:
|
475 |
+
import importlib
|
476 |
+
|
477 |
+
mod = importlib.import_module(name)
|
478 |
+
return mod.__version__
|
479 |
+
except (ImportError, AttributeError):
|
480 |
+
return None
|
481 |
+
|
482 |
+
|
483 |
+
def setup_logging(
|
484 |
+
logger: logging.Logger | None = None,
|
485 |
+
logger_name: str | None = None,
|
486 |
+
level: str = "DEBUG",
|
487 |
+
clear: bool = True,
|
488 |
+
) -> logging.Logger:
|
489 |
+
if logger is None and logger_name is None:
|
490 |
+
raise ValueError("Provide either logger object or logger name")
|
491 |
+
logger = logger or logging.getLogger(logger_name)
|
492 |
+
handle = logging.StreamHandler()
|
493 |
+
formatter = logging.Formatter(
|
494 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
|
495 |
+
)
|
496 |
+
handle.setFormatter(formatter)
|
497 |
+
if clear:
|
498 |
+
logger.handlers.clear()
|
499 |
+
logger.addHandler(handle)
|
500 |
+
logger.setLevel(level)
|
501 |
+
return logger
|
502 |
+
|
503 |
+
|
504 |
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
505 |
+
return fs.unstrip_protocol(name)
|
506 |
+
|
507 |
+
|
508 |
+
def mirror_from(
|
509 |
+
origin_name: str, methods: Iterable[str]
|
510 |
+
) -> Callable[[type[T]], type[T]]:
|
511 |
+
"""Mirror attributes and methods from the given
|
512 |
+
origin_name attribute of the instance to the
|
513 |
+
decorated class"""
|
514 |
+
|
515 |
+
def origin_getter(method: str, self: Any) -> Any:
|
516 |
+
origin = getattr(self, origin_name)
|
517 |
+
return getattr(origin, method)
|
518 |
+
|
519 |
+
def wrapper(cls: type[T]) -> type[T]:
|
520 |
+
for method in methods:
|
521 |
+
wrapped_method = partial(origin_getter, method)
|
522 |
+
setattr(cls, method, property(wrapped_method))
|
523 |
+
return cls
|
524 |
+
|
525 |
+
return wrapper
|
526 |
+
|
527 |
+
|
528 |
+
@contextlib.contextmanager
|
529 |
+
def nullcontext(obj: T) -> Iterator[T]:
|
530 |
+
yield obj
|
531 |
+
|
532 |
+
|
533 |
+
def merge_offset_ranges(
|
534 |
+
paths: list[str],
|
535 |
+
starts: list[int] | int,
|
536 |
+
ends: list[int] | int,
|
537 |
+
max_gap: int = 0,
|
538 |
+
max_block: int | None = None,
|
539 |
+
sort: bool = True,
|
540 |
+
) -> tuple[list[str], list[int], list[int]]:
|
541 |
+
"""Merge adjacent byte-offset ranges when the inter-range
|
542 |
+
gap is <= `max_gap`, and when the merged byte range does not
|
543 |
+
exceed `max_block` (if specified). By default, this function
|
544 |
+
will re-order the input paths and byte ranges to ensure sorted
|
545 |
+
order. If the user can guarantee that the inputs are already
|
546 |
+
sorted, passing `sort=False` will skip the re-ordering.
|
547 |
+
"""
|
548 |
+
# Check input
|
549 |
+
if not isinstance(paths, list):
|
550 |
+
raise TypeError
|
551 |
+
if not isinstance(starts, list):
|
552 |
+
starts = [starts] * len(paths)
|
553 |
+
if not isinstance(ends, list):
|
554 |
+
ends = [ends] * len(paths)
|
555 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
556 |
+
raise ValueError
|
557 |
+
|
558 |
+
# Early Return
|
559 |
+
if len(starts) <= 1:
|
560 |
+
return paths, starts, ends
|
561 |
+
|
562 |
+
starts = [s or 0 for s in starts]
|
563 |
+
# Sort by paths and then ranges if `sort=True`
|
564 |
+
if sort:
|
565 |
+
paths, starts, ends = (
|
566 |
+
list(v)
|
567 |
+
for v in zip(
|
568 |
+
*sorted(
|
569 |
+
zip(paths, starts, ends),
|
570 |
+
)
|
571 |
+
)
|
572 |
+
)
|
573 |
+
|
574 |
+
if paths:
|
575 |
+
# Loop through the coupled `paths`, `starts`, and
|
576 |
+
# `ends`, and merge adjacent blocks when appropriate
|
577 |
+
new_paths = paths[:1]
|
578 |
+
new_starts = starts[:1]
|
579 |
+
new_ends = ends[:1]
|
580 |
+
for i in range(1, len(paths)):
|
581 |
+
if paths[i] == paths[i - 1] and new_ends[-1] is None:
|
582 |
+
continue
|
583 |
+
elif (
|
584 |
+
paths[i] != paths[i - 1]
|
585 |
+
or ((starts[i] - new_ends[-1]) > max_gap)
|
586 |
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
587 |
+
):
|
588 |
+
# Cannot merge with previous block.
|
589 |
+
# Add new `paths`, `starts`, and `ends` elements
|
590 |
+
new_paths.append(paths[i])
|
591 |
+
new_starts.append(starts[i])
|
592 |
+
new_ends.append(ends[i])
|
593 |
+
else:
|
594 |
+
# Merge with previous block by updating the
|
595 |
+
# last element of `ends`
|
596 |
+
new_ends[-1] = ends[i]
|
597 |
+
return new_paths, new_starts, new_ends
|
598 |
+
|
599 |
+
# `paths` is empty. Just return input lists
|
600 |
+
return paths, starts, ends
|
601 |
+
|
602 |
+
|
603 |
+
def file_size(filelike: IO[bytes]) -> int:
|
604 |
+
"""Find length of any open read-mode file-like"""
|
605 |
+
pos = filelike.tell()
|
606 |
+
try:
|
607 |
+
return filelike.seek(0, 2)
|
608 |
+
finally:
|
609 |
+
filelike.seek(pos)
|
610 |
+
|
611 |
+
|
612 |
+
@contextlib.contextmanager
|
613 |
+
def atomic_write(path: str, mode: str = "wb"):
|
614 |
+
"""
|
615 |
+
A context manager that opens a temporary file next to `path` and, on exit,
|
616 |
+
replaces `path` with the temporary file, thereby updating `path`
|
617 |
+
atomically.
|
618 |
+
"""
|
619 |
+
fd, fn = tempfile.mkstemp(
|
620 |
+
dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
|
621 |
+
)
|
622 |
+
try:
|
623 |
+
with open(fd, mode) as fp:
|
624 |
+
yield fp
|
625 |
+
except BaseException:
|
626 |
+
with contextlib.suppress(FileNotFoundError):
|
627 |
+
os.unlink(fn)
|
628 |
+
raise
|
629 |
+
else:
|
630 |
+
os.replace(fn, path)
|
631 |
+
|
632 |
+
|
633 |
+
def _translate(pat, STAR, QUESTION_MARK):
|
634 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
635 |
+
res: list[str] = []
|
636 |
+
add = res.append
|
637 |
+
i, n = 0, len(pat)
|
638 |
+
while i < n:
|
639 |
+
c = pat[i]
|
640 |
+
i = i + 1
|
641 |
+
if c == "*":
|
642 |
+
# compress consecutive `*` into one
|
643 |
+
if (not res) or res[-1] is not STAR:
|
644 |
+
add(STAR)
|
645 |
+
elif c == "?":
|
646 |
+
add(QUESTION_MARK)
|
647 |
+
elif c == "[":
|
648 |
+
j = i
|
649 |
+
if j < n and pat[j] == "!":
|
650 |
+
j = j + 1
|
651 |
+
if j < n and pat[j] == "]":
|
652 |
+
j = j + 1
|
653 |
+
while j < n and pat[j] != "]":
|
654 |
+
j = j + 1
|
655 |
+
if j >= n:
|
656 |
+
add("\\[")
|
657 |
+
else:
|
658 |
+
stuff = pat[i:j]
|
659 |
+
if "-" not in stuff:
|
660 |
+
stuff = stuff.replace("\\", r"\\")
|
661 |
+
else:
|
662 |
+
chunks = []
|
663 |
+
k = i + 2 if pat[i] == "!" else i + 1
|
664 |
+
while True:
|
665 |
+
k = pat.find("-", k, j)
|
666 |
+
if k < 0:
|
667 |
+
break
|
668 |
+
chunks.append(pat[i:k])
|
669 |
+
i = k + 1
|
670 |
+
k = k + 3
|
671 |
+
chunk = pat[i:j]
|
672 |
+
if chunk:
|
673 |
+
chunks.append(chunk)
|
674 |
+
else:
|
675 |
+
chunks[-1] += "-"
|
676 |
+
# Remove empty ranges -- invalid in RE.
|
677 |
+
for k in range(len(chunks) - 1, 0, -1):
|
678 |
+
if chunks[k - 1][-1] > chunks[k][0]:
|
679 |
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
680 |
+
del chunks[k]
|
681 |
+
# Escape backslashes and hyphens for set difference (--).
|
682 |
+
# Hyphens that create ranges shouldn't be escaped.
|
683 |
+
stuff = "-".join(
|
684 |
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
685 |
+
)
|
686 |
+
# Escape set operations (&&, ~~ and ||).
|
687 |
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
688 |
+
i = j + 1
|
689 |
+
if not stuff:
|
690 |
+
# Empty range: never match.
|
691 |
+
add("(?!)")
|
692 |
+
elif stuff == "!":
|
693 |
+
# Negated empty range: match any character.
|
694 |
+
add(".")
|
695 |
+
else:
|
696 |
+
if stuff[0] == "!":
|
697 |
+
stuff = "^" + stuff[1:]
|
698 |
+
elif stuff[0] in ("^", "["):
|
699 |
+
stuff = "\\" + stuff
|
700 |
+
add(f"[{stuff}]")
|
701 |
+
else:
|
702 |
+
add(re.escape(c))
|
703 |
+
assert i == n
|
704 |
+
return res
|
705 |
+
|
706 |
+
|
707 |
+
def glob_translate(pat):
|
708 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
709 |
+
# The keyword parameters' values are fixed to:
|
710 |
+
# recursive=True, include_hidden=True, seps=None
|
711 |
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
712 |
+
if os.path.altsep:
|
713 |
+
seps = os.path.sep + os.path.altsep
|
714 |
+
else:
|
715 |
+
seps = os.path.sep
|
716 |
+
escaped_seps = "".join(map(re.escape, seps))
|
717 |
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
718 |
+
not_sep = f"[^{escaped_seps}]"
|
719 |
+
one_last_segment = f"{not_sep}+"
|
720 |
+
one_segment = f"{one_last_segment}{any_sep}"
|
721 |
+
any_segments = f"(?:.+{any_sep})?"
|
722 |
+
any_last_segments = ".*"
|
723 |
+
results = []
|
724 |
+
parts = re.split(any_sep, pat)
|
725 |
+
last_part_idx = len(parts) - 1
|
726 |
+
for idx, part in enumerate(parts):
|
727 |
+
if part == "*":
|
728 |
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
729 |
+
continue
|
730 |
+
if part == "**":
|
731 |
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
732 |
+
continue
|
733 |
+
elif "**" in part:
|
734 |
+
raise ValueError(
|
735 |
+
"Invalid pattern: '**' can only be an entire path component"
|
736 |
+
)
|
737 |
+
if part:
|
738 |
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
739 |
+
if idx < last_part_idx:
|
740 |
+
results.append(any_sep)
|
741 |
+
res = "".join(results)
|
742 |
+
return rf"(?s:{res})\Z"
|
lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so
ADDED
Binary file (332 kB). View file
|
|
lib/python3.11/site-packages/functorch/__init__.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from torch._functorch.deprecated import (
|
9 |
+
combine_state_for_ensemble,
|
10 |
+
functionalize,
|
11 |
+
grad,
|
12 |
+
grad_and_value,
|
13 |
+
hessian,
|
14 |
+
jacfwd,
|
15 |
+
jacrev,
|
16 |
+
jvp,
|
17 |
+
make_functional,
|
18 |
+
make_functional_with_buffers,
|
19 |
+
vjp,
|
20 |
+
vmap,
|
21 |
+
)
|
22 |
+
|
23 |
+
# utilities. Maybe these should go in their own namespace in the future?
|
24 |
+
from torch._functorch.make_functional import (
|
25 |
+
FunctionalModule,
|
26 |
+
FunctionalModuleWithBuffers,
|
27 |
+
)
|
28 |
+
|
29 |
+
# Top-level APIs. Please think carefully before adding something to the
|
30 |
+
# top-level namespace:
|
31 |
+
# - private helper functions should go into torch._functorch
|
32 |
+
# - very experimental things should go into functorch.experimental
|
33 |
+
# - compilation related things should go into functorch.compile
|
34 |
+
|
35 |
+
# Was never documented
|
36 |
+
from torch._functorch.python_key import make_fx
|
37 |
+
|
38 |
+
__version__ = torch.__version__
|
lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (947 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/_src/__init__.py
ADDED
File without changes
|
lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (229 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.aot_autograd import (
|
5 |
+
aot_autograd_decompositions,
|
6 |
+
KNOWN_TYPES,
|
7 |
+
PytreeThunk,
|
8 |
+
)
|
lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (417 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.eager_transforms import (
|
5 |
+
_assert_wrapped_functional,
|
6 |
+
_unwrap_functional_tensor,
|
7 |
+
)
|
lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (406 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.make_functional import _swap_state
|
lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (329 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/_src/vmap/__init__.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.vmap import (
|
5 |
+
_add_batch_dim,
|
6 |
+
_broadcast_to_and_flatten,
|
7 |
+
_create_batched_inputs,
|
8 |
+
_get_name,
|
9 |
+
_process_batched_inputs,
|
10 |
+
_remove_batch_dim,
|
11 |
+
_unwrap_batched,
|
12 |
+
_validate_and_get_batch_size,
|
13 |
+
Tensor,
|
14 |
+
tree_flatten,
|
15 |
+
tree_unflatten,
|
16 |
+
)
|
lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (705 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/compile/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch._functorch import config
|
2 |
+
from torch._functorch.aot_autograd import (
|
3 |
+
aot_function,
|
4 |
+
aot_module,
|
5 |
+
aot_module_simplified,
|
6 |
+
compiled_function,
|
7 |
+
compiled_module,
|
8 |
+
get_aot_compilation_context,
|
9 |
+
get_aot_graph_name,
|
10 |
+
get_graph_being_compiled,
|
11 |
+
make_boxed_compiler,
|
12 |
+
make_boxed_func,
|
13 |
+
)
|
14 |
+
from torch._functorch.compilers import (
|
15 |
+
debug_compile,
|
16 |
+
default_decompositions,
|
17 |
+
draw_graph_compile,
|
18 |
+
memory_efficient_fusion,
|
19 |
+
nnc_jit,
|
20 |
+
nop,
|
21 |
+
print_compile,
|
22 |
+
ts_compile,
|
23 |
+
)
|
24 |
+
from torch._functorch.fx_minifier import minifier
|
25 |
+
from torch._functorch.partitioners import (
|
26 |
+
default_partition,
|
27 |
+
draw_graph,
|
28 |
+
draw_joint_graph,
|
29 |
+
min_cut_rematerialization_partition,
|
30 |
+
)
|
31 |
+
from torch._functorch.python_key import pythonkey_decompose
|
lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (1.47 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__init__.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dis
|
2 |
+
import inspect
|
3 |
+
from typing import Sequence, Union
|
4 |
+
|
5 |
+
import torch
|
6 |
+
|
7 |
+
import functorch._C
|
8 |
+
from functorch._C import dim as _C
|
9 |
+
from .tree_map import tree_flatten, tree_map
|
10 |
+
from .wrap_type import wrap_type
|
11 |
+
|
12 |
+
_C._patch_tensor_class()
|
13 |
+
dims, DimList, dimlists = _C.dims, _C.DimList, _C.dimlists
|
14 |
+
|
15 |
+
|
16 |
+
class DimensionMismatchError(Exception):
|
17 |
+
pass
|
18 |
+
|
19 |
+
|
20 |
+
class DimensionBindError(Exception):
|
21 |
+
pass
|
22 |
+
|
23 |
+
|
24 |
+
from . import op_properties
|
25 |
+
|
26 |
+
# use dict to avoid writing C++ bindings for set
|
27 |
+
pointwise = {t: True for t in op_properties.pointwise}
|
28 |
+
|
29 |
+
use_c = True
|
30 |
+
if not use_c:
|
31 |
+
from . import reference
|
32 |
+
|
33 |
+
|
34 |
+
class _Tensor:
|
35 |
+
# fast path around slow wrapping/unwrapping logic for simply queries used
|
36 |
+
# by the implementation...
|
37 |
+
|
38 |
+
@property
|
39 |
+
def dims(self):
|
40 |
+
return tuple(d for d in self._levels if isinstance(d, Dim))
|
41 |
+
|
42 |
+
def dim(self):
|
43 |
+
return self.ndim
|
44 |
+
|
45 |
+
if use_c:
|
46 |
+
__torch_function__ = classmethod(_C.__torch_function__)
|
47 |
+
expand = _C._instancemethod(_C.expand)
|
48 |
+
else:
|
49 |
+
__torch_function__ = reference.__torch_function__
|
50 |
+
expand = reference.expand
|
51 |
+
|
52 |
+
index = _C._instancemethod(_C.index)
|
53 |
+
|
54 |
+
def __repr__(self):
|
55 |
+
tensor, levels, ndim = self._tensor, self._levels, self.ndim
|
56 |
+
return f"{tensor}\nwith dims={tuple(l + ndim if isinstance(l, int) else l for l in levels)} sizes={tuple(tensor.size())}"
|
57 |
+
|
58 |
+
|
59 |
+
TensorLike = (_Tensor, torch.Tensor)
|
60 |
+
|
61 |
+
|
62 |
+
class Dim(_C.Dim, _Tensor):
|
63 |
+
# note that _C.Dim comes before tensor because we want the Dim API for things like size to take precendence.
|
64 |
+
# Tensor defines format, but we want to print Dims with special formatting
|
65 |
+
__format__ = object.__format__
|
66 |
+
|
67 |
+
|
68 |
+
class Tensor(_Tensor, _C.Tensor):
|
69 |
+
if not use_c:
|
70 |
+
from_batched = staticmethod(_C.Tensor_from_batched)
|
71 |
+
from_positional = staticmethod(_C.Tensor_from_positional)
|
72 |
+
sum = _C._instancemethod(_C.Tensor_sum)
|
73 |
+
|
74 |
+
|
75 |
+
def cat(tensors, dim, new_dim):
|
76 |
+
n = dims()
|
77 |
+
return stack(tensors, n, dim).index([n, dim], new_dim)
|
78 |
+
|
79 |
+
|
80 |
+
if use_c:
|
81 |
+
_wrap = _C._wrap
|
82 |
+
|
83 |
+
def _def(name, *args, **kwargs):
|
84 |
+
orig = getattr(torch.Tensor, name)
|
85 |
+
setattr(_Tensor, name, _C._instancemethod(_wrap(orig, *args, **kwargs)))
|
86 |
+
|
87 |
+
t__getitem__ = _C._instancemethod(_C.__getitem__)
|
88 |
+
stack = _C.stack
|
89 |
+
split = _C._instancemethod(_C.split)
|
90 |
+
else:
|
91 |
+
_wrap, _def = reference._wrap, reference._def
|
92 |
+
t__getitem__ = reference.t__getitem__
|
93 |
+
stack = reference.stack
|
94 |
+
split = reference.split
|
95 |
+
|
96 |
+
# note: there is no python reference
|
97 |
+
t__setitem__ = _C._instancemethod(_C.__setitem__)
|
98 |
+
# this is patched in the C API because otherwise torch.Tensor will
|
99 |
+
# no longer be considered a sequence and things will break
|
100 |
+
# torch.Tensor.__getitem__ = t__getitem__
|
101 |
+
|
102 |
+
_Tensor.__getitem__ = t__getitem__
|
103 |
+
# torch.Tensor.__setitem__ = t__setitem__
|
104 |
+
_Tensor.__setitem__ = t__setitem__
|
105 |
+
|
106 |
+
torch.Tensor.split = split
|
107 |
+
_Tensor.split = split
|
108 |
+
torch.Tensor.expand = _C._instancemethod(_C.expand)
|
109 |
+
torch.Tensor.index = _C._instancemethod(_C.index)
|
110 |
+
wrap_type(use_c, _Tensor, torch.Tensor, _Tensor.__torch_function__)
|
111 |
+
del _Tensor.ndim
|
112 |
+
|
113 |
+
if use_c:
|
114 |
+
_Tensor.order = _C._instancemethod(_C.order)
|
115 |
+
else:
|
116 |
+
_Tensor.order = reference.positional
|
117 |
+
|
118 |
+
_def("mean")
|
119 |
+
_def("sum")
|
120 |
+
_def("all")
|
121 |
+
_def("amax")
|
122 |
+
_def("amin")
|
123 |
+
_def("aminmax")
|
124 |
+
_def("any")
|
125 |
+
_def("count_nonzero")
|
126 |
+
_def("logsumexp")
|
127 |
+
_def("nanmean")
|
128 |
+
_def("nansum")
|
129 |
+
_def("prod")
|
130 |
+
_def("std", keepdim_offset=2)
|
131 |
+
_def("var", keepdim_offset=2)
|
132 |
+
_def("max", single_dim=True)
|
133 |
+
_def("min", single_dim=True)
|
134 |
+
_def("argmax", single_dim=True)
|
135 |
+
_def("argmin", single_dim=True)
|
136 |
+
_def("kthvalue", single_dim=True)
|
137 |
+
_def("median", single_dim=True)
|
138 |
+
_def("nanmedian", single_dim=True)
|
139 |
+
_def("mode", single_dim=True)
|
140 |
+
_def("sort", reduce=False)
|
141 |
+
_def("argsort", reduce=False)
|
142 |
+
_def("unbind", single_dim=True)
|
143 |
+
_def("chunk", dim_offset=1, reduce=False)
|
144 |
+
_def("cummax", single_dim=True, reduce=False)
|
145 |
+
_def("cummin", single_dim=True, reduce=False)
|
146 |
+
_def("cumprod", single_dim=True, reduce=False)
|
147 |
+
_def("cumprod_", single_dim=True, reduce=False)
|
148 |
+
_def("cumsum", single_dim=True, reduce=False)
|
149 |
+
_def("cumsum_", single_dim=True, reduce=False)
|
150 |
+
_def("logcumsumexp", single_dim=True, reduce=False)
|
151 |
+
_def("renorm", dim_offset=1, single_dim=True, reduce=False)
|
152 |
+
_def("softmax", single_dim=True, reduce=False)
|
153 |
+
softmax = _wrap(torch.nn.functional.softmax, single_dim=True, reduce=False)
|
154 |
+
|
155 |
+
# stuff to handle in the future, because they require special
|
156 |
+
# binding logic for dims
|
157 |
+
# cross
|
158 |
+
# diag_embed
|
159 |
+
# diagonal
|
160 |
+
# diagonal_scatter
|
161 |
+
# diff
|
162 |
+
# nanquantile
|
163 |
+
# quantile
|
164 |
+
# roll
|
165 |
+
# rot90
|
166 |
+
# topk (new dimes on output)
|
167 |
+
# should these all be subsumed by inplace indexing?
|
168 |
+
# index_add_
|
169 |
+
# index_add
|
170 |
+
# index_copy
|
171 |
+
# index_copy_
|
172 |
+
# index_fill
|
173 |
+
# index_fill_
|
174 |
+
# index_select
|
175 |
+
# scatter
|
176 |
+
# scatter_
|
177 |
+
# scatter_add
|
178 |
+
# scatter_add_
|
179 |
+
# scatter_reduce
|
lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (7.99 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc
ADDED
Binary file (1.29 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc
ADDED
Binary file (5.61 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc
ADDED
Binary file (6.89 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc
ADDED
Binary file (2.49 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc
ADDED
Binary file (12.1 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc
ADDED
Binary file (32.4 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc
ADDED
Binary file (803 Bytes). View file
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc
ADDED
Binary file (2.55 kB). View file
|
|
lib/python3.11/site-packages/functorch/dim/batch_tensor.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
from contextlib import contextmanager
|
7 |
+
|
8 |
+
from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
|
9 |
+
|
10 |
+
_enabled = False
|
11 |
+
|
12 |
+
|
13 |
+
@contextmanager
|
14 |
+
def _enable_layers(dims):
|
15 |
+
global _enabled
|
16 |
+
assert not _enabled
|
17 |
+
input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
|
18 |
+
n = len(input)
|
19 |
+
try:
|
20 |
+
_vmap_add_layers(input)
|
21 |
+
_enabled = True
|
22 |
+
yield
|
23 |
+
finally:
|
24 |
+
_enabled = False
|
25 |
+
_vmap_remove_layers(n)
|
lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from . import _Tensor, Tensor
|
9 |
+
from .reference import _dims, _enable_layers, llist, ltuple
|
10 |
+
|
11 |
+
|
12 |
+
class DelayedMulTensor(_Tensor):
|
13 |
+
def __init__(self, lhs, rhs):
|
14 |
+
self._lhs, self._rhs = lhs, rhs
|
15 |
+
self._data = None
|
16 |
+
self._levels_data = None
|
17 |
+
self._has_device = lhs._has_device or rhs._has_device
|
18 |
+
self._batchtensor_data = None
|
19 |
+
self._tensor_data = None
|
20 |
+
|
21 |
+
@property
|
22 |
+
def _levels(self):
|
23 |
+
if self._levels_data is None:
|
24 |
+
levels = llist(self._lhs._levels)
|
25 |
+
for l in self._rhs._levels:
|
26 |
+
if l not in levels:
|
27 |
+
levels.append(l)
|
28 |
+
self._levels_data = ltuple(levels)
|
29 |
+
return self._levels_data
|
30 |
+
|
31 |
+
@property
|
32 |
+
def _batchtensor(self):
|
33 |
+
if self._batchtensor_data is None:
|
34 |
+
with _enable_layers(self._levels):
|
35 |
+
print("bt multiply fallback")
|
36 |
+
self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
|
37 |
+
return self._batchtensor_data
|
38 |
+
|
39 |
+
@property
|
40 |
+
def _tensor(self):
|
41 |
+
if self._tensor_data is None:
|
42 |
+
self._tensor_data = Tensor.from_batched(
|
43 |
+
self._batchtensor, self._has_device
|
44 |
+
)._tensor
|
45 |
+
return self._tensor_data
|
46 |
+
|
47 |
+
@property
|
48 |
+
def ndim(self):
|
49 |
+
return self._batchtensor.ndim
|
50 |
+
|
51 |
+
@property
|
52 |
+
def dims(self):
|
53 |
+
return ltuple(super().dims)
|
54 |
+
|
55 |
+
def sum(self, dim):
|
56 |
+
dims = _dims(dim, 0, False, False)
|
57 |
+
n = ord("a")
|
58 |
+
all_levels = self._levels
|
59 |
+
|
60 |
+
def to_char(d):
|
61 |
+
return chr(n + all_levels.index(d))
|
62 |
+
|
63 |
+
plhs, levelslhs = self._lhs._tensor, self._lhs._levels
|
64 |
+
prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
|
65 |
+
new_dims = tuple(d for d in self.dims if d not in dims)
|
66 |
+
new_levels = [l for l in self._levels if l not in dims]
|
67 |
+
fmt = "".join(
|
68 |
+
[
|
69 |
+
*(to_char(d) for d in levelslhs),
|
70 |
+
",",
|
71 |
+
*(to_char(d) for d in levelsrhs),
|
72 |
+
"->",
|
73 |
+
*(to_char(d) for d in new_levels),
|
74 |
+
]
|
75 |
+
)
|
76 |
+
result_data = torch.einsum(fmt, (plhs, prhs))
|
77 |
+
return Tensor.from_positional(result_data, new_levels, True)
|
lib/python3.11/site-packages/functorch/dim/dim.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
_vmap_levels = []
|
7 |
+
|
8 |
+
|
9 |
+
@dataclass
|
10 |
+
class LevelInfo:
|
11 |
+
level: int
|
12 |
+
alive: bool = True
|
13 |
+
|
14 |
+
|
15 |
+
class Dim:
|
16 |
+
def __init__(self, name: str, size: Union[None, int] = None):
|
17 |
+
self.name = name
|
18 |
+
self._size = None
|
19 |
+
self._vmap_level = None
|
20 |
+
if size is not None:
|
21 |
+
self.size = size
|
22 |
+
|
23 |
+
def __del__(self):
|
24 |
+
if self._vmap_level is not None:
|
25 |
+
_vmap_active_levels[self._vmap_stack].alive = False
|
26 |
+
while (
|
27 |
+
not _vmap_levels[-1].alive and current_level() == _vmap_levels[-1].level
|
28 |
+
):
|
29 |
+
_vmap_decrement_nesting()
|
30 |
+
_vmap_levels.pop()
|
31 |
+
|
32 |
+
@property
|
33 |
+
def size(self):
|
34 |
+
assert self.is_bound
|
35 |
+
return self._size
|
36 |
+
|
37 |
+
@size.setter
|
38 |
+
def size(self, size: int):
|
39 |
+
if self._size is None:
|
40 |
+
self._size = size
|
41 |
+
self._vmap_level = _vmap_increment_nesting(size, "same")
|
42 |
+
self._vmap_stack = len(_vmap_levels)
|
43 |
+
_vmap_levels.append(LevelInfo(self._vmap_level))
|
44 |
+
|
45 |
+
elif self._size != size:
|
46 |
+
raise DimensionBindError(
|
47 |
+
f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
|
48 |
+
)
|
49 |
+
|
50 |
+
@property
|
51 |
+
def is_bound(self):
|
52 |
+
return self._size is not None
|
53 |
+
|
54 |
+
def __repr__(self):
|
55 |
+
return self.name
|
56 |
+
|
57 |
+
|
58 |
+
def extract_name(inst):
|
59 |
+
assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
|
60 |
+
return inst.argval
|
61 |
+
|
62 |
+
|
63 |
+
_cache = {}
|
64 |
+
|
65 |
+
|
66 |
+
def dims(lists=0):
|
67 |
+
frame = inspect.currentframe()
|
68 |
+
assert frame is not None
|
69 |
+
calling_frame = frame.f_back
|
70 |
+
assert calling_frame is not None
|
71 |
+
code, lasti = calling_frame.f_code, calling_frame.f_lasti
|
72 |
+
key = (code, lasti)
|
73 |
+
if key not in _cache:
|
74 |
+
first = lasti // 2 + 1
|
75 |
+
instructions = list(dis.get_instructions(calling_frame.f_code))
|
76 |
+
unpack = instructions[first]
|
77 |
+
|
78 |
+
if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
|
79 |
+
# just a single dim, not a list
|
80 |
+
name = unpack.argval
|
81 |
+
ctor = Dim if lists == 0 else DimList
|
82 |
+
_cache[key] = lambda: ctor(name=name)
|
83 |
+
else:
|
84 |
+
assert unpack.opname == "UNPACK_SEQUENCE"
|
85 |
+
ndims = unpack.argval
|
86 |
+
names = tuple(
|
87 |
+
extract_name(instructions[first + 1 + i]) for i in range(ndims)
|
88 |
+
)
|
89 |
+
first_list = len(names) - lists
|
90 |
+
_cache[key] = lambda: tuple(
|
91 |
+
Dim(n) if i < first_list else DimList(name=n)
|
92 |
+
for i, n in enumerate(names)
|
93 |
+
)
|
94 |
+
return _cache[key]()
|
95 |
+
|
96 |
+
|
97 |
+
def _dim_set(positional, arg):
|
98 |
+
def convert(a):
|
99 |
+
if isinstance(a, Dim):
|
100 |
+
return a
|
101 |
+
else:
|
102 |
+
assert isinstance(a, int)
|
103 |
+
return positional[a]
|
104 |
+
|
105 |
+
if arg is None:
|
106 |
+
return positional
|
107 |
+
elif not isinstance(arg, (Dim, int)):
|
108 |
+
return tuple(convert(a) for a in arg)
|
109 |
+
else:
|
110 |
+
return (convert(arg),)
|
lib/python3.11/site-packages/functorch/dim/magic_trace.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import os
|
7 |
+
import signal
|
8 |
+
import subprocess
|
9 |
+
from contextlib import contextmanager
|
10 |
+
|
11 |
+
|
12 |
+
@contextmanager
|
13 |
+
def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
|
14 |
+
pid = os.getpid()
|
15 |
+
if not os.path.exists(magic_trace_cache):
|
16 |
+
print(f"Downloading magic_trace to: {magic_trace_cache}")
|
17 |
+
subprocess.run(
|
18 |
+
[
|
19 |
+
"wget",
|
20 |
+
"-O",
|
21 |
+
magic_trace_cache,
|
22 |
+
"-q",
|
23 |
+
"https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
|
24 |
+
]
|
25 |
+
)
|
26 |
+
subprocess.run(["chmod", "+x", magic_trace_cache])
|
27 |
+
args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
|
28 |
+
p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
|
29 |
+
while True:
|
30 |
+
x = p.stderr.readline()
|
31 |
+
print(x)
|
32 |
+
if "Attached" in x:
|
33 |
+
break
|
34 |
+
try:
|
35 |
+
yield
|
36 |
+
finally:
|
37 |
+
p.send_signal(signal.SIGINT)
|
38 |
+
r = p.wait()
|
39 |
+
print(p.stderr.read())
|
40 |
+
p.stderr.close()
|
41 |
+
if r != 0:
|
42 |
+
raise ValueError(f"magic_trace exited abnormally: {r}")
|