reach-vb HF staff commited on
Commit
8f05c80
1 Parent(s): 6017349

da0609001d38541f2e1d84b2fab95a3e5cb5413337fc2247150c3f19aae1664e

Browse files
Files changed (50) hide show
  1. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER +1 -0
  2. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE +29 -0
  3. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA +168 -0
  4. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD +104 -0
  5. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL +5 -0
  6. lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt +1 -0
  7. lib/python3.11/site-packages/fsspec/parquet.py +551 -0
  8. lib/python3.11/site-packages/fsspec/registry.py +299 -0
  9. lib/python3.11/site-packages/fsspec/spec.py +1963 -0
  10. lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
  11. lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc +0 -0
  12. lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc +0 -0
  13. lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc +0 -0
  14. lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
  15. lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc +0 -0
  16. lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
  17. lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +543 -0
  18. lib/python3.11/site-packages/fsspec/tests/abstract/get.py +587 -0
  19. lib/python3.11/site-packages/fsspec/tests/abstract/put.py +577 -0
  20. lib/python3.11/site-packages/fsspec/transaction.py +85 -0
  21. lib/python3.11/site-packages/fsspec/utils.py +742 -0
  22. lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so +0 -0
  23. lib/python3.11/site-packages/functorch/__init__.py +38 -0
  24. lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc +0 -0
  25. lib/python3.11/site-packages/functorch/_src/__init__.py +0 -0
  26. lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc +0 -0
  27. lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
  28. lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
  29. lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py +7 -0
  30. lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc +0 -0
  31. lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
  32. lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
  33. lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
  34. lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
  35. lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
  36. lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc +0 -0
  37. lib/python3.11/site-packages/functorch/dim/__init__.py +179 -0
  38. lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
  39. lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc +0 -0
  40. lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc +0 -0
  41. lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc +0 -0
  42. lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc +0 -0
  43. lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc +0 -0
  44. lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
  45. lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc +0 -0
  46. lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc +0 -0
  47. lib/python3.11/site-packages/functorch/dim/batch_tensor.py +25 -0
  48. lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
  49. lib/python3.11/site-packages/functorch/dim/dim.py +110 -0
  50. lib/python3.11/site-packages/functorch/dim/magic_trace.py +42 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2018, Martin Durant
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: fsspec
3
+ Version: 2023.12.2
4
+ Summary: File-system specification
5
+ Home-page: https://github.com/fsspec/filesystem_spec
6
+ Maintainer: Martin Durant
7
+ Maintainer-email: [email protected]
8
+ License: BSD
9
+ Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
10
+ Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
11
+ Keywords: file
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: BSD License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Provides-Extra: abfs
24
+ Requires-Dist: adlfs ; extra == 'abfs'
25
+ Provides-Extra: adl
26
+ Requires-Dist: adlfs ; extra == 'adl'
27
+ Provides-Extra: arrow
28
+ Requires-Dist: pyarrow >=1 ; extra == 'arrow'
29
+ Provides-Extra: dask
30
+ Requires-Dist: dask ; extra == 'dask'
31
+ Requires-Dist: distributed ; extra == 'dask'
32
+ Provides-Extra: devel
33
+ Requires-Dist: pytest ; extra == 'devel'
34
+ Requires-Dist: pytest-cov ; extra == 'devel'
35
+ Provides-Extra: dropbox
36
+ Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
37
+ Requires-Dist: requests ; extra == 'dropbox'
38
+ Requires-Dist: dropbox ; extra == 'dropbox'
39
+ Provides-Extra: entrypoints
40
+ Provides-Extra: full
41
+ Requires-Dist: adlfs ; extra == 'full'
42
+ Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
43
+ Requires-Dist: dask ; extra == 'full'
44
+ Requires-Dist: distributed ; extra == 'full'
45
+ Requires-Dist: dropbox ; extra == 'full'
46
+ Requires-Dist: dropboxdrivefs ; extra == 'full'
47
+ Requires-Dist: fusepy ; extra == 'full'
48
+ Requires-Dist: gcsfs ; extra == 'full'
49
+ Requires-Dist: libarchive-c ; extra == 'full'
50
+ Requires-Dist: ocifs ; extra == 'full'
51
+ Requires-Dist: panel ; extra == 'full'
52
+ Requires-Dist: paramiko ; extra == 'full'
53
+ Requires-Dist: pyarrow >=1 ; extra == 'full'
54
+ Requires-Dist: pygit2 ; extra == 'full'
55
+ Requires-Dist: requests ; extra == 'full'
56
+ Requires-Dist: s3fs ; extra == 'full'
57
+ Requires-Dist: smbprotocol ; extra == 'full'
58
+ Requires-Dist: tqdm ; extra == 'full'
59
+ Provides-Extra: fuse
60
+ Requires-Dist: fusepy ; extra == 'fuse'
61
+ Provides-Extra: gcs
62
+ Requires-Dist: gcsfs ; extra == 'gcs'
63
+ Provides-Extra: git
64
+ Requires-Dist: pygit2 ; extra == 'git'
65
+ Provides-Extra: github
66
+ Requires-Dist: requests ; extra == 'github'
67
+ Provides-Extra: gs
68
+ Requires-Dist: gcsfs ; extra == 'gs'
69
+ Provides-Extra: gui
70
+ Requires-Dist: panel ; extra == 'gui'
71
+ Provides-Extra: hdfs
72
+ Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
73
+ Provides-Extra: http
74
+ Requires-Dist: requests ; extra == 'http'
75
+ Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
76
+ Provides-Extra: libarchive
77
+ Requires-Dist: libarchive-c ; extra == 'libarchive'
78
+ Provides-Extra: oci
79
+ Requires-Dist: ocifs ; extra == 'oci'
80
+ Provides-Extra: s3
81
+ Requires-Dist: s3fs ; extra == 's3'
82
+ Provides-Extra: sftp
83
+ Requires-Dist: paramiko ; extra == 'sftp'
84
+ Provides-Extra: smb
85
+ Requires-Dist: smbprotocol ; extra == 'smb'
86
+ Provides-Extra: ssh
87
+ Requires-Dist: paramiko ; extra == 'ssh'
88
+ Provides-Extra: tqdm
89
+ Requires-Dist: tqdm ; extra == 'tqdm'
90
+
91
+ # filesystem_spec
92
+
93
+ [![PyPI version](https://badge.fury.io/py/fsspec.svg)](https://pypi.python.org/pypi/fsspec/)
94
+ [![Anaconda-Server Badge](https://anaconda.org/conda-forge/fsspec/badges/version.svg)](https://anaconda.org/conda-forge/fsspec)
95
+ ![Build](https://github.com/fsspec/filesystem_spec/workflows/CI/badge.svg)
96
+ [![Docs](https://readthedocs.org/projects/filesystem-spec/badge/?version=latest)](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
97
+ [![PyPi downloads](https://img.shields.io/pypi/dm/fsspec?label=pypi%20downloads&style=flat)](https://pepy.tech/project/fsspec)
98
+
99
+ A specification for pythonic filesystems.
100
+
101
+ ## Install
102
+
103
+ ```bash
104
+ pip install fsspec
105
+ ```
106
+
107
+ would install the base fsspec. Various optionally supported features might require specification of custom
108
+ extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
109
+ Use `pip install fsspec[full]` for installation of all known extra dependencies.
110
+
111
+ Up-to-date package also provided through conda-forge distribution:
112
+
113
+ ```bash
114
+ conda install -c conda-forge fsspec
115
+ ```
116
+
117
+
118
+ ## Purpose
119
+
120
+ To produce a template or specification for a file-system interface, that specific implementations should follow,
121
+ so that applications making use of them can rely on a common behaviour and not have to worry about the specific
122
+ internal implementation decisions with any given backend. Many such implementations are included in this package,
123
+ or in sister projects such as `s3fs` and `gcsfs`.
124
+
125
+ In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
126
+ mounting of the file-system implementation may be available for all implementations "for free".
127
+
128
+ ## Documentation
129
+
130
+ Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
131
+
132
+ ## Develop
133
+
134
+ fsspec uses GitHub Actions for CI. Environment files can be found
135
+ in the "ci/" directory. Note that the main environment is called "py38",
136
+ but it is expected that the version of python installed be adjustable at
137
+ CI runtime. For local use, pick a version suitable for you.
138
+
139
+ ### Testing
140
+
141
+ Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
142
+
143
+ The full fsspec suite requires a system-level docker, docker-compose, and fuse
144
+ installation. If only making changes to one backend implementation, it is
145
+ not generally necessary to run all tests locally.
146
+
147
+ It is expected that contributors ensure that any change to fsspec does not
148
+ cause issues or regressions for either other fsspec-related packages such
149
+ as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
150
+ run and corresponding environment file run a set of tests from the dask
151
+ test suite, and very minimal tests against pandas and zarr from the
152
+ test_downstream.py module in this repo.
153
+
154
+ ### Code Formatting
155
+
156
+ fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
157
+ a consistent code format throughout the project.
158
+ Run ``black fsspec`` from the root of the filesystem_spec repository to
159
+ auto-format your code. Additionally, many editors have plugins that will apply
160
+ ``black`` as you edit files. ``black`` is included in the ``tox`` environments.
161
+
162
+ Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
163
+ automatically run ``black`` when you make a git commit.
164
+ Run ``pre-commit install --install-hooks`` from the root of the
165
+ filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
166
+ before you commit, reformatting any changed files. You can format without
167
+ committing via ``pre-commit run`` or skip these checks with ``git commit
168
+ --no-verify``.
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fsspec-2023.12.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ fsspec-2023.12.2.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
3
+ fsspec-2023.12.2.dist-info/METADATA,sha256=toLeg14fW_MfA33P2NVIPEyWFL7k004pAolypgHrECQ,6829
4
+ fsspec-2023.12.2.dist-info/RECORD,,
5
+ fsspec-2023.12.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
+ fsspec-2023.12.2.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
7
+ fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
8
+ fsspec/__pycache__/__init__.cpython-311.pyc,,
9
+ fsspec/__pycache__/_version.cpython-311.pyc,,
10
+ fsspec/__pycache__/archive.cpython-311.pyc,,
11
+ fsspec/__pycache__/asyn.cpython-311.pyc,,
12
+ fsspec/__pycache__/caching.cpython-311.pyc,,
13
+ fsspec/__pycache__/callbacks.cpython-311.pyc,,
14
+ fsspec/__pycache__/compression.cpython-311.pyc,,
15
+ fsspec/__pycache__/config.cpython-311.pyc,,
16
+ fsspec/__pycache__/conftest.cpython-311.pyc,,
17
+ fsspec/__pycache__/core.cpython-311.pyc,,
18
+ fsspec/__pycache__/dircache.cpython-311.pyc,,
19
+ fsspec/__pycache__/exceptions.cpython-311.pyc,,
20
+ fsspec/__pycache__/fuse.cpython-311.pyc,,
21
+ fsspec/__pycache__/generic.cpython-311.pyc,,
22
+ fsspec/__pycache__/gui.cpython-311.pyc,,
23
+ fsspec/__pycache__/mapping.cpython-311.pyc,,
24
+ fsspec/__pycache__/parquet.cpython-311.pyc,,
25
+ fsspec/__pycache__/registry.cpython-311.pyc,,
26
+ fsspec/__pycache__/spec.cpython-311.pyc,,
27
+ fsspec/__pycache__/transaction.cpython-311.pyc,,
28
+ fsspec/__pycache__/utils.cpython-311.pyc,,
29
+ fsspec/_version.py,sha256=Kf9CIUDExVlqHjn9lLOn0QJcfeRWAe0PFvFHkRzI9iA,501
30
+ fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
31
+ fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
32
+ fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
33
+ fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
34
+ fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
35
+ fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
36
+ fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
37
+ fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
38
+ fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
39
+ fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
40
+ fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
41
+ fsspec/generic.py,sha256=2EcEegwdTLyQ2qSgz3Y6cbAuiWz7bybsEWai_XYkGtw,13457
42
+ fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
43
+ fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
45
+ fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
46
+ fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
47
+ fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
48
+ fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
49
+ fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
50
+ fsspec/implementations/__pycache__/data.cpython-311.pyc,,
51
+ fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
52
+ fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
53
+ fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
54
+ fsspec/implementations/__pycache__/git.cpython-311.pyc,,
55
+ fsspec/implementations/__pycache__/github.cpython-311.pyc,,
56
+ fsspec/implementations/__pycache__/http.cpython-311.pyc,,
57
+ fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
58
+ fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
59
+ fsspec/implementations/__pycache__/local.cpython-311.pyc,,
60
+ fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
61
+ fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
62
+ fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
63
+ fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
64
+ fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
65
+ fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
66
+ fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
67
+ fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
68
+ fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
69
+ fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
70
+ fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
71
+ fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
72
+ fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
73
+ fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
74
+ fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
75
+ fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
76
+ fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
77
+ fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
78
+ fsspec/implementations/http.py,sha256=cK7HQdVgR8PVLWkB0q0xsXohOP16X-zQiT2uqB1Kq4E,29265
79
+ fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
80
+ fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
81
+ fsspec/implementations/local.py,sha256=GV5OltZrz9aOM8KKSx3T7QE7-U9KX3BOz3Eql3jw_xY,13371
82
+ fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
83
+ fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
84
+ fsspec/implementations/sftp.py,sha256=TNmXVac9c5H9Gmiee2EjZNKXnXdkwwaNL2cHDkp_gG4,5632
85
+ fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
86
+ fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
87
+ fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
88
+ fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
89
+ fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
90
+ fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
91
+ fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
92
+ fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
93
+ fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
94
+ fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
95
+ fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
96
+ fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
97
+ fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
98
+ fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
99
+ fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
100
+ fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
101
+ fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
102
+ fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
103
+ fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
104
+ fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fsspec
lib/python3.11/site-packages/fsspec/parquet.py ADDED
@@ -0,0 +1,551 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import json
3
+ import warnings
4
+
5
+ from .core import url_to_fs
6
+ from .utils import merge_offset_ranges
7
+
8
+ # Parquet-Specific Utilities for fsspec
9
+ #
10
+ # Most of the functions defined in this module are NOT
11
+ # intended for public consumption. The only exception
12
+ # to this is `open_parquet_file`, which should be used
13
+ # place of `fs.open()` to open parquet-formatted files
14
+ # on remote file systems.
15
+
16
+
17
+ def open_parquet_file(
18
+ path,
19
+ mode="rb",
20
+ fs=None,
21
+ metadata=None,
22
+ columns=None,
23
+ row_groups=None,
24
+ storage_options=None,
25
+ strict=False,
26
+ engine="auto",
27
+ max_gap=64_000,
28
+ max_block=256_000_000,
29
+ footer_sample_size=1_000_000,
30
+ **kwargs,
31
+ ):
32
+ """
33
+ Return a file-like object for a single Parquet file.
34
+
35
+ The specified parquet `engine` will be used to parse the
36
+ footer metadata, and determine the required byte ranges
37
+ from the file. The target path will then be opened with
38
+ the "parts" (`KnownPartsOfAFile`) caching strategy.
39
+
40
+ Note that this method is intended for usage with remote
41
+ file systems, and is unlikely to improve parquet-read
42
+ performance on local file systems.
43
+
44
+ Parameters
45
+ ----------
46
+ path: str
47
+ Target file path.
48
+ mode: str, optional
49
+ Mode option to be passed through to `fs.open`. Default is "rb".
50
+ metadata: Any, optional
51
+ Parquet metadata object. Object type must be supported
52
+ by the backend parquet engine. For now, only the "fastparquet"
53
+ engine supports an explicit `ParquetFile` metadata object.
54
+ If a metadata object is supplied, the remote footer metadata
55
+ will not need to be transferred into local memory.
56
+ fs: AbstractFileSystem, optional
57
+ Filesystem object to use for opening the file. If nothing is
58
+ specified, an `AbstractFileSystem` object will be inferred.
59
+ engine : str, default "auto"
60
+ Parquet engine to use for metadata parsing. Allowed options
61
+ include "fastparquet", "pyarrow", and "auto". The specified
62
+ engine must be installed in the current environment. If
63
+ "auto" is specified, and both engines are installed,
64
+ "fastparquet" will take precedence over "pyarrow".
65
+ columns: list, optional
66
+ List of all column names that may be read from the file.
67
+ row_groups : list, optional
68
+ List of all row-groups that may be read from the file. This
69
+ may be a list of row-group indices (integers), or it may be
70
+ a list of `RowGroup` metadata objects (if the "fastparquet"
71
+ engine is used).
72
+ storage_options : dict, optional
73
+ Used to generate an `AbstractFileSystem` object if `fs` was
74
+ not specified.
75
+ strict : bool, optional
76
+ Whether the resulting `KnownPartsOfAFile` cache should
77
+ fetch reads that go beyond a known byte-range boundary.
78
+ If `False` (the default), any read that ends outside a
79
+ known part will be zero padded. Note that using
80
+ `strict=True` may be useful for debugging.
81
+ max_gap : int, optional
82
+ Neighboring byte ranges will only be merged when their
83
+ inter-range gap is <= `max_gap`. Default is 64KB.
84
+ max_block : int, optional
85
+ Neighboring byte ranges will only be merged when the size of
86
+ the aggregated range is <= `max_block`. Default is 256MB.
87
+ footer_sample_size : int, optional
88
+ Number of bytes to read from the end of the path to look
89
+ for the footer metadata. If the sampled bytes do not contain
90
+ the footer, a second read request will be required, and
91
+ performance will suffer. Default is 1MB.
92
+ **kwargs :
93
+ Optional key-word arguments to pass to `fs.open`
94
+ """
95
+
96
+ # Make sure we have an `AbstractFileSystem` object
97
+ # to work with
98
+ if fs is None:
99
+ fs = url_to_fs(path, **(storage_options or {}))[0]
100
+
101
+ # For now, `columns == []` not supported. Just use
102
+ # default `open` command with `path` input
103
+ if columns is not None and len(columns) == 0:
104
+ return fs.open(path, mode=mode)
105
+
106
+ # Set the engine
107
+ engine = _set_engine(engine)
108
+
109
+ # Fetch the known byte ranges needed to read
110
+ # `columns` and/or `row_groups`
111
+ data = _get_parquet_byte_ranges(
112
+ [path],
113
+ fs,
114
+ metadata=metadata,
115
+ columns=columns,
116
+ row_groups=row_groups,
117
+ engine=engine,
118
+ max_gap=max_gap,
119
+ max_block=max_block,
120
+ footer_sample_size=footer_sample_size,
121
+ )
122
+
123
+ # Extract file name from `data`
124
+ fn = next(iter(data)) if data else path
125
+
126
+ # Call self.open with "parts" caching
127
+ options = kwargs.pop("cache_options", {}).copy()
128
+ return fs.open(
129
+ fn,
130
+ mode=mode,
131
+ cache_type="parts",
132
+ cache_options={
133
+ **options,
134
+ **{
135
+ "data": data.get(fn, {}),
136
+ "strict": strict,
137
+ },
138
+ },
139
+ **kwargs,
140
+ )
141
+
142
+
143
+ def _get_parquet_byte_ranges(
144
+ paths,
145
+ fs,
146
+ metadata=None,
147
+ columns=None,
148
+ row_groups=None,
149
+ max_gap=64_000,
150
+ max_block=256_000_000,
151
+ footer_sample_size=1_000_000,
152
+ engine="auto",
153
+ ):
154
+ """Get a dictionary of the known byte ranges needed
155
+ to read a specific column/row-group selection from a
156
+ Parquet dataset. Each value in the output dictionary
157
+ is intended for use as the `data` argument for the
158
+ `KnownPartsOfAFile` caching strategy of a single path.
159
+ """
160
+
161
+ # Set engine if necessary
162
+ if isinstance(engine, str):
163
+ engine = _set_engine(engine)
164
+
165
+ # Pass to specialized function if metadata is defined
166
+ if metadata is not None:
167
+
168
+ # Use the provided parquet metadata object
169
+ # to avoid transferring/parsing footer metadata
170
+ return _get_parquet_byte_ranges_from_metadata(
171
+ metadata,
172
+ fs,
173
+ engine,
174
+ columns=columns,
175
+ row_groups=row_groups,
176
+ max_gap=max_gap,
177
+ max_block=max_block,
178
+ )
179
+
180
+ # Get file sizes asynchronously
181
+ file_sizes = fs.sizes(paths)
182
+
183
+ # Populate global paths, starts, & ends
184
+ result = {}
185
+ data_paths = []
186
+ data_starts = []
187
+ data_ends = []
188
+ add_header_magic = True
189
+ if columns is None and row_groups is None:
190
+ # We are NOT selecting specific columns or row-groups.
191
+ #
192
+ # We can avoid sampling the footers, and just transfer
193
+ # all file data with cat_ranges
194
+ for i, path in enumerate(paths):
195
+ result[path] = {}
196
+ for b in range(0, file_sizes[i], max_block):
197
+ data_paths.append(path)
198
+ data_starts.append(b)
199
+ data_ends.append(min(b + max_block, file_sizes[i]))
200
+ add_header_magic = False # "Magic" should already be included
201
+ else:
202
+ # We ARE selecting specific columns or row-groups.
203
+ #
204
+ # Gather file footers.
205
+ # We just take the last `footer_sample_size` bytes of each
206
+ # file (or the entire file if it is smaller than that)
207
+ footer_starts = []
208
+ footer_ends = []
209
+ for i, path in enumerate(paths):
210
+ footer_ends.append(file_sizes[i])
211
+ sample_size = max(0, file_sizes[i] - footer_sample_size)
212
+ footer_starts.append(sample_size)
213
+ footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
214
+
215
+ # Check our footer samples and re-sample if necessary.
216
+ missing_footer_starts = footer_starts.copy()
217
+ large_footer = 0
218
+ for i, path in enumerate(paths):
219
+ footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
220
+ real_footer_start = file_sizes[i] - (footer_size + 8)
221
+ if real_footer_start < footer_starts[i]:
222
+ missing_footer_starts[i] = real_footer_start
223
+ large_footer = max(large_footer, (footer_size + 8))
224
+ if large_footer:
225
+ warnings.warn(
226
+ f"Not enough data was used to sample the parquet footer. "
227
+ f"Try setting footer_sample_size >= {large_footer}."
228
+ )
229
+ for i, block in enumerate(
230
+ fs.cat_ranges(
231
+ paths,
232
+ missing_footer_starts,
233
+ footer_starts,
234
+ )
235
+ ):
236
+ footer_samples[i] = block + footer_samples[i]
237
+ footer_starts[i] = missing_footer_starts[i]
238
+
239
+ # Calculate required byte ranges for each path
240
+ for i, path in enumerate(paths):
241
+
242
+ # Deal with small-file case.
243
+ # Just include all remaining bytes of the file
244
+ # in a single range.
245
+ if file_sizes[i] < max_block:
246
+ if footer_starts[i] > 0:
247
+ # Only need to transfer the data if the
248
+ # footer sample isn't already the whole file
249
+ data_paths.append(path)
250
+ data_starts.append(0)
251
+ data_ends.append(footer_starts[i])
252
+ continue
253
+
254
+ # Use "engine" to collect data byte ranges
255
+ path_data_starts, path_data_ends = engine._parquet_byte_ranges(
256
+ columns,
257
+ row_groups=row_groups,
258
+ footer=footer_samples[i],
259
+ footer_start=footer_starts[i],
260
+ )
261
+
262
+ data_paths += [path] * len(path_data_starts)
263
+ data_starts += path_data_starts
264
+ data_ends += path_data_ends
265
+
266
+ # Merge adjacent offset ranges
267
+ data_paths, data_starts, data_ends = merge_offset_ranges(
268
+ data_paths,
269
+ data_starts,
270
+ data_ends,
271
+ max_gap=max_gap,
272
+ max_block=max_block,
273
+ sort=False, # Should already be sorted
274
+ )
275
+
276
+ # Start by populating `result` with footer samples
277
+ for i, path in enumerate(paths):
278
+ result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
279
+
280
+ # Transfer the data byte-ranges into local memory
281
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
282
+
283
+ # Add b"PAR1" to header if necessary
284
+ if add_header_magic:
285
+ _add_header_magic(result)
286
+
287
+ return result
288
+
289
+
290
+ def _get_parquet_byte_ranges_from_metadata(
291
+ metadata,
292
+ fs,
293
+ engine,
294
+ columns=None,
295
+ row_groups=None,
296
+ max_gap=64_000,
297
+ max_block=256_000_000,
298
+ ):
299
+ """Simplified version of `_get_parquet_byte_ranges` for
300
+ the case that an engine-specific `metadata` object is
301
+ provided, and the remote footer metadata does not need to
302
+ be transferred before calculating the required byte ranges.
303
+ """
304
+
305
+ # Use "engine" to collect data byte ranges
306
+ data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
307
+ columns,
308
+ row_groups=row_groups,
309
+ metadata=metadata,
310
+ )
311
+
312
+ # Merge adjacent offset ranges
313
+ data_paths, data_starts, data_ends = merge_offset_ranges(
314
+ data_paths,
315
+ data_starts,
316
+ data_ends,
317
+ max_gap=max_gap,
318
+ max_block=max_block,
319
+ sort=False, # Should be sorted
320
+ )
321
+
322
+ # Transfer the data byte-ranges into local memory
323
+ result = {fn: {} for fn in list(set(data_paths))}
324
+ _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
325
+
326
+ # Add b"PAR1" to header
327
+ _add_header_magic(result)
328
+
329
+ return result
330
+
331
+
332
+ def _transfer_ranges(fs, blocks, paths, starts, ends):
333
+ # Use cat_ranges to gather the data byte_ranges
334
+ ranges = (paths, starts, ends)
335
+ for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
336
+ blocks[path][(start, stop)] = data
337
+
338
+
339
+ def _add_header_magic(data):
340
+ # Add b"PAR1" to file headers
341
+ for i, path in enumerate(list(data.keys())):
342
+ add_magic = True
343
+ for k in data[path].keys():
344
+ if k[0] == 0 and k[1] >= 4:
345
+ add_magic = False
346
+ break
347
+ if add_magic:
348
+ data[path][(0, 4)] = b"PAR1"
349
+
350
+
351
+ def _set_engine(engine_str):
352
+
353
+ # Define a list of parquet engines to try
354
+ if engine_str == "auto":
355
+ try_engines = ("fastparquet", "pyarrow")
356
+ elif not isinstance(engine_str, str):
357
+ raise ValueError(
358
+ "Failed to set parquet engine! "
359
+ "Please pass 'fastparquet', 'pyarrow', or 'auto'"
360
+ )
361
+ elif engine_str not in ("fastparquet", "pyarrow"):
362
+ raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
363
+ else:
364
+ try_engines = [engine_str]
365
+
366
+ # Try importing the engines in `try_engines`,
367
+ # and choose the first one that succeeds
368
+ for engine in try_engines:
369
+ try:
370
+ if engine == "fastparquet":
371
+ return FastparquetEngine()
372
+ elif engine == "pyarrow":
373
+ return PyarrowEngine()
374
+ except ImportError:
375
+ pass
376
+
377
+ # Raise an error if a supported parquet engine
378
+ # was not found
379
+ raise ImportError(
380
+ f"The following parquet engines are not installed "
381
+ f"in your python environment: {try_engines}."
382
+ f"Please install 'fastparquert' or 'pyarrow' to "
383
+ f"utilize the `fsspec.parquet` module."
384
+ )
385
+
386
+
387
+ class FastparquetEngine:
388
+
389
+ # The purpose of the FastparquetEngine class is
390
+ # to check if fastparquet can be imported (on initialization)
391
+ # and to define a `_parquet_byte_ranges` method. In the
392
+ # future, this class may also be used to define other
393
+ # methods/logic that are specific to fastparquet.
394
+
395
+ def __init__(self):
396
+ import fastparquet as fp
397
+
398
+ self.fp = fp
399
+
400
+ def _row_group_filename(self, row_group, pf):
401
+ return pf.row_group_filename(row_group)
402
+
403
+ def _parquet_byte_ranges(
404
+ self,
405
+ columns,
406
+ row_groups=None,
407
+ metadata=None,
408
+ footer=None,
409
+ footer_start=None,
410
+ ):
411
+
412
+ # Initialize offset ranges and define ParqetFile metadata
413
+ pf = metadata
414
+ data_paths, data_starts, data_ends = [], [], []
415
+ if pf is None:
416
+ pf = self.fp.ParquetFile(io.BytesIO(footer))
417
+
418
+ # Convert columns to a set and add any index columns
419
+ # specified in the pandas metadata (just in case)
420
+ column_set = None if columns is None else set(columns)
421
+ if column_set is not None and hasattr(pf, "pandas_metadata"):
422
+ md_index = [
423
+ ind
424
+ for ind in pf.pandas_metadata.get("index_columns", [])
425
+ # Ignore RangeIndex information
426
+ if not isinstance(ind, dict)
427
+ ]
428
+ column_set |= set(md_index)
429
+
430
+ # Check if row_groups is a list of integers
431
+ # or a list of row-group metadata
432
+ if row_groups and not isinstance(row_groups[0], int):
433
+ # Input row_groups contains row-group metadata
434
+ row_group_indices = None
435
+ else:
436
+ # Input row_groups contains row-group indices
437
+ row_group_indices = row_groups
438
+ row_groups = pf.row_groups
439
+
440
+ # Loop through column chunks to add required byte ranges
441
+ for r, row_group in enumerate(row_groups):
442
+ # Skip this row-group if we are targeting
443
+ # specific row-groups
444
+ if row_group_indices is None or r in row_group_indices:
445
+
446
+ # Find the target parquet-file path for `row_group`
447
+ fn = self._row_group_filename(row_group, pf)
448
+
449
+ for column in row_group.columns:
450
+ name = column.meta_data.path_in_schema[0]
451
+ # Skip this column if we are targeting a
452
+ # specific columns
453
+ if column_set is None or name in column_set:
454
+ file_offset0 = column.meta_data.dictionary_page_offset
455
+ if file_offset0 is None:
456
+ file_offset0 = column.meta_data.data_page_offset
457
+ num_bytes = column.meta_data.total_compressed_size
458
+ if footer_start is None or file_offset0 < footer_start:
459
+ data_paths.append(fn)
460
+ data_starts.append(file_offset0)
461
+ data_ends.append(
462
+ min(
463
+ file_offset0 + num_bytes,
464
+ footer_start or (file_offset0 + num_bytes),
465
+ )
466
+ )
467
+
468
+ if metadata:
469
+ # The metadata in this call may map to multiple
470
+ # file paths. Need to include `data_paths`
471
+ return data_paths, data_starts, data_ends
472
+ return data_starts, data_ends
473
+
474
+
475
+ class PyarrowEngine:
476
+
477
+ # The purpose of the PyarrowEngine class is
478
+ # to check if pyarrow can be imported (on initialization)
479
+ # and to define a `_parquet_byte_ranges` method. In the
480
+ # future, this class may also be used to define other
481
+ # methods/logic that are specific to pyarrow.
482
+
483
+ def __init__(self):
484
+ import pyarrow.parquet as pq
485
+
486
+ self.pq = pq
487
+
488
+ def _row_group_filename(self, row_group, metadata):
489
+ raise NotImplementedError
490
+
491
+ def _parquet_byte_ranges(
492
+ self,
493
+ columns,
494
+ row_groups=None,
495
+ metadata=None,
496
+ footer=None,
497
+ footer_start=None,
498
+ ):
499
+
500
+ if metadata is not None:
501
+ raise ValueError("metadata input not supported for PyarrowEngine")
502
+
503
+ data_starts, data_ends = [], []
504
+ md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
505
+
506
+ # Convert columns to a set and add any index columns
507
+ # specified in the pandas metadata (just in case)
508
+ column_set = None if columns is None else set(columns)
509
+ if column_set is not None:
510
+ schema = md.schema.to_arrow_schema()
511
+ has_pandas_metadata = (
512
+ schema.metadata is not None and b"pandas" in schema.metadata
513
+ )
514
+ if has_pandas_metadata:
515
+ md_index = [
516
+ ind
517
+ for ind in json.loads(
518
+ schema.metadata[b"pandas"].decode("utf8")
519
+ ).get("index_columns", [])
520
+ # Ignore RangeIndex information
521
+ if not isinstance(ind, dict)
522
+ ]
523
+ column_set |= set(md_index)
524
+
525
+ # Loop through column chunks to add required byte ranges
526
+ for r in range(md.num_row_groups):
527
+ # Skip this row-group if we are targeting
528
+ # specific row-groups
529
+ if row_groups is None or r in row_groups:
530
+ row_group = md.row_group(r)
531
+ for c in range(row_group.num_columns):
532
+ column = row_group.column(c)
533
+ name = column.path_in_schema
534
+ # Skip this column if we are targeting a
535
+ # specific columns
536
+ split_name = name.split(".")[0]
537
+ if (
538
+ column_set is None
539
+ or name in column_set
540
+ or split_name in column_set
541
+ ):
542
+ file_offset0 = column.dictionary_page_offset
543
+ if file_offset0 is None:
544
+ file_offset0 = column.data_page_offset
545
+ num_bytes = column.total_compressed_size
546
+ if file_offset0 < footer_start:
547
+ data_starts.append(file_offset0)
548
+ data_ends.append(
549
+ min(file_offset0 + num_bytes, footer_start)
550
+ )
551
+ return data_starts, data_ends
lib/python3.11/site-packages/fsspec/registry.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import types
5
+ import warnings
6
+
7
+ __all__ = ["registry", "get_filesystem_class", "default"]
8
+
9
+ # internal, mutable
10
+ _registry: dict[str, type] = {}
11
+
12
+ # external, immutable
13
+ registry = types.MappingProxyType(_registry)
14
+ default = "file"
15
+
16
+
17
+ def register_implementation(name, cls, clobber=False, errtxt=None):
18
+ """Add implementation class to the registry
19
+
20
+ Parameters
21
+ ----------
22
+ name: str
23
+ Protocol name to associate with the class
24
+ cls: class or str
25
+ if a class: fsspec-compliant implementation class (normally inherits from
26
+ ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
27
+ str, the full path to an implementation class like package.module.class,
28
+ which gets added to known_implementations,
29
+ so the import is deferred until the filesystem is actually used.
30
+ clobber: bool (optional)
31
+ Whether to overwrite a protocol with the same name; if False, will raise
32
+ instead.
33
+ errtxt: str (optional)
34
+ If given, then a failure to import the given class will result in this
35
+ text being given.
36
+ """
37
+ if isinstance(cls, str):
38
+ if name in known_implementations and clobber is False:
39
+ if cls != known_implementations[name]["class"]:
40
+ raise ValueError(
41
+ f"Name ({name}) already in the known_implementations and clobber "
42
+ f"is False"
43
+ )
44
+ else:
45
+ known_implementations[name] = {
46
+ "class": cls,
47
+ "err": errtxt or f"{cls} import failed for protocol {name}",
48
+ }
49
+
50
+ else:
51
+ if name in registry and clobber is False:
52
+ if _registry[name] is not cls:
53
+ raise ValueError(
54
+ f"Name ({name}) already in the registry and clobber is False"
55
+ )
56
+ else:
57
+ _registry[name] = cls
58
+
59
+
60
+ # protocols mapped to the class which implements them. This dict can be
61
+ # updated with register_implementation
62
+ known_implementations = {
63
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
64
+ "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
65
+ "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
66
+ "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
67
+ "dropbox": {
68
+ "class": "dropboxdrivefs.DropboxDriveFileSystem",
69
+ "err": (
70
+ 'DropboxFileSystem requires "dropboxdrivefs",'
71
+ '"requests" and "dropbox" to be installed'
72
+ ),
73
+ },
74
+ "http": {
75
+ "class": "fsspec.implementations.http.HTTPFileSystem",
76
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
77
+ },
78
+ "https": {
79
+ "class": "fsspec.implementations.http.HTTPFileSystem",
80
+ "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
81
+ },
82
+ "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
83
+ "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
84
+ "gcs": {
85
+ "class": "gcsfs.GCSFileSystem",
86
+ "err": "Please install gcsfs to access Google Storage",
87
+ },
88
+ "gs": {
89
+ "class": "gcsfs.GCSFileSystem",
90
+ "err": "Please install gcsfs to access Google Storage",
91
+ },
92
+ "gdrive": {
93
+ "class": "gdrivefs.GoogleDriveFileSystem",
94
+ "err": "Please install gdrivefs for access to Google Drive",
95
+ },
96
+ "sftp": {
97
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
98
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
99
+ },
100
+ "ssh": {
101
+ "class": "fsspec.implementations.sftp.SFTPFileSystem",
102
+ "err": 'SFTPFileSystem requires "paramiko" to be installed',
103
+ },
104
+ "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
105
+ "hdfs": {
106
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
107
+ "err": "pyarrow and local java libraries required for HDFS",
108
+ },
109
+ "arrow_hdfs": {
110
+ "class": "fsspec.implementations.arrow.HadoopFileSystem",
111
+ "err": "pyarrow and local java libraries required for HDFS",
112
+ },
113
+ "webhdfs": {
114
+ "class": "fsspec.implementations.webhdfs.WebHDFS",
115
+ "err": 'webHDFS access requires "requests" to be installed',
116
+ },
117
+ "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
118
+ "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
119
+ "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
120
+ "oci": {
121
+ "class": "ocifs.OCIFileSystem",
122
+ "err": "Install ocifs to access OCI Object Storage",
123
+ },
124
+ "ocilake": {
125
+ "class": "ocifs.OCIFileSystem",
126
+ "err": "Install ocifs to access OCI Data Lake",
127
+ },
128
+ "asynclocal": {
129
+ "class": "morefs.asyn_local.AsyncLocalFileSystem",
130
+ "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
131
+ },
132
+ "adl": {
133
+ "class": "adlfs.AzureDatalakeFileSystem",
134
+ "err": "Install adlfs to access Azure Datalake Gen1",
135
+ },
136
+ "abfs": {
137
+ "class": "adlfs.AzureBlobFileSystem",
138
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
139
+ },
140
+ "az": {
141
+ "class": "adlfs.AzureBlobFileSystem",
142
+ "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
143
+ },
144
+ "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
145
+ "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
146
+ "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
147
+ "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
148
+ "dask": {
149
+ "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
150
+ "err": "Install dask distributed to access worker file system",
151
+ },
152
+ "dbfs": {
153
+ "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
154
+ "err": "Install the requests package to use the DatabricksFileSystem",
155
+ },
156
+ "github": {
157
+ "class": "fsspec.implementations.github.GithubFileSystem",
158
+ "err": "Install the requests package to use the github FS",
159
+ },
160
+ "git": {
161
+ "class": "fsspec.implementations.git.GitFileSystem",
162
+ "err": "Install pygit2 to browse local git repos",
163
+ },
164
+ "smb": {
165
+ "class": "fsspec.implementations.smb.SMBFileSystem",
166
+ "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
167
+ },
168
+ "jupyter": {
169
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
170
+ "err": "Jupyter FS requires requests to be installed",
171
+ },
172
+ "jlab": {
173
+ "class": "fsspec.implementations.jupyter.JupyterFileSystem",
174
+ "err": "Jupyter FS requires requests to be installed",
175
+ },
176
+ "libarchive": {
177
+ "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
178
+ "err": "LibArchive requires to be installed",
179
+ },
180
+ "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
181
+ "generic": {"class": "fsspec.generic.GenericFileSystem"},
182
+ "oss": {
183
+ "class": "ossfs.OSSFileSystem",
184
+ "err": "Install ossfs to access Alibaba Object Storage System",
185
+ },
186
+ "webdav": {
187
+ "class": "webdav4.fsspec.WebdavFileSystem",
188
+ "err": "Install webdav4 to access WebDAV",
189
+ },
190
+ "dvc": {
191
+ "class": "dvc.api.DVCFileSystem",
192
+ "err": "Install dvc to access DVCFileSystem",
193
+ },
194
+ "hf": {
195
+ "class": "huggingface_hub.HfFileSystem",
196
+ "err": "Install huggingface_hub to access HfFileSystem",
197
+ },
198
+ "root": {
199
+ "class": "fsspec_xrootd.XRootDFileSystem",
200
+ "err": "Install fsspec-xrootd to access xrootd storage system."
201
+ + " Note: 'root' is the protocol name for xrootd storage systems,"
202
+ + " not referring to root directories",
203
+ },
204
+ "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
205
+ "box": {
206
+ "class": "boxfs.BoxFileSystem",
207
+ "err": "Please install boxfs to access BoxFileSystem",
208
+ },
209
+ "lakefs": {
210
+ "class": "lakefs_spec.LakeFSFileSystem",
211
+ "err": "Please install lakefs-spec to access LakeFSFileSystem",
212
+ },
213
+ }
214
+
215
+
216
+ def get_filesystem_class(protocol):
217
+ """Fetch named protocol implementation from the registry
218
+
219
+ The dict ``known_implementations`` maps protocol names to the locations
220
+ of classes implementing the corresponding file-system. When used for the
221
+ first time, appropriate imports will happen and the class will be placed in
222
+ the registry. All subsequent calls will fetch directly from the registry.
223
+
224
+ Some protocol implementations require additional dependencies, and so the
225
+ import may fail. In this case, the string in the "err" field of the
226
+ ``known_implementations`` will be given as the error message.
227
+ """
228
+ if not protocol:
229
+ protocol = default
230
+
231
+ if protocol not in registry:
232
+ if protocol not in known_implementations:
233
+ raise ValueError(f"Protocol not known: {protocol}")
234
+ bit = known_implementations[protocol]
235
+ try:
236
+ register_implementation(protocol, _import_class(bit["class"]))
237
+ except ImportError as e:
238
+ raise ImportError(bit["err"]) from e
239
+ cls = registry[protocol]
240
+ if getattr(cls, "protocol", None) in ("abstract", None):
241
+ cls.protocol = protocol
242
+
243
+ return cls
244
+
245
+
246
+ s3_msg = """Your installed version of s3fs is very old and known to cause
247
+ severe performance issues, see also https://github.com/dask/dask/issues/10276
248
+
249
+ To fix, you should specify a lower version bound on s3fs, or
250
+ update the current installation.
251
+ """
252
+
253
+
254
+ def _import_class(cls, minv=None):
255
+ """Take a string FQP and return the imported class or identifier
256
+
257
+ clas is of the form "package.module.klass" or "package.module:subobject.klass"
258
+ """
259
+ if ":" in cls:
260
+ mod, name = cls.rsplit(":", 1)
261
+ s3 = mod == "s3fs"
262
+ mod = importlib.import_module(mod)
263
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
264
+ warnings.warn(s3_msg)
265
+ for part in name.split("."):
266
+ mod = getattr(mod, part)
267
+ return mod
268
+ else:
269
+ mod, name = cls.rsplit(".", 1)
270
+ s3 = mod == "s3fs"
271
+ mod = importlib.import_module(mod)
272
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
273
+ warnings.warn(s3_msg)
274
+ return getattr(mod, name)
275
+
276
+
277
+ def filesystem(protocol, **storage_options):
278
+ """Instantiate filesystems for given protocol and arguments
279
+
280
+ ``storage_options`` are specific to the protocol being chosen, and are
281
+ passed directly to the class.
282
+ """
283
+ if protocol == "arrow_hdfs":
284
+ warnings.warn(
285
+ "The 'arrow_hdfs' protocol has been deprecated and will be "
286
+ "removed in the future. Specify it as 'hdfs'.",
287
+ DeprecationWarning,
288
+ )
289
+
290
+ cls = get_filesystem_class(protocol)
291
+ return cls(**storage_options)
292
+
293
+
294
+ def available_protocols():
295
+ """Return a list of the implemented protocols.
296
+
297
+ Note that any given protocol may require extra packages to be importable.
298
+ """
299
+ return list(known_implementations)
lib/python3.11/site-packages/fsspec/spec.py ADDED
@@ -0,0 +1,1963 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import threading
7
+ import warnings
8
+ import weakref
9
+ from errno import ESPIPE
10
+ from glob import has_magic
11
+ from hashlib import sha256
12
+ from typing import ClassVar
13
+
14
+ from .callbacks import _DEFAULT_CALLBACK
15
+ from .config import apply_config, conf
16
+ from .dircache import DirCache
17
+ from .transaction import Transaction
18
+ from .utils import (
19
+ _unstrip_protocol,
20
+ glob_translate,
21
+ isfilelike,
22
+ other_paths,
23
+ read_block,
24
+ stringify_path,
25
+ tokenize,
26
+ )
27
+
28
+ logger = logging.getLogger("fsspec")
29
+
30
+
31
+ def make_instance(cls, args, kwargs):
32
+ return cls(*args, **kwargs)
33
+
34
+
35
+ class _Cached(type):
36
+ """
37
+ Metaclass for caching file system instances.
38
+
39
+ Notes
40
+ -----
41
+ Instances are cached according to
42
+
43
+ * The values of the class attributes listed in `_extra_tokenize_attributes`
44
+ * The arguments passed to ``__init__``.
45
+
46
+ This creates an additional reference to the filesystem, which prevents the
47
+ filesystem from being garbage collected when all *user* references go away.
48
+ A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
49
+ be made for a filesystem instance to be garbage collected.
50
+ """
51
+
52
+ def __init__(cls, *args, **kwargs):
53
+ super().__init__(*args, **kwargs)
54
+ # Note: we intentionally create a reference here, to avoid garbage
55
+ # collecting instances when all other references are gone. To really
56
+ # delete a FileSystem, the cache must be cleared.
57
+ if conf.get("weakref_instance_cache"): # pragma: no cover
58
+ # debug option for analysing fork/spawn conditions
59
+ cls._cache = weakref.WeakValueDictionary()
60
+ else:
61
+ cls._cache = {}
62
+ cls._pid = os.getpid()
63
+
64
+ def __call__(cls, *args, **kwargs):
65
+ kwargs = apply_config(cls, kwargs)
66
+ extra_tokens = tuple(
67
+ getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
68
+ )
69
+ token = tokenize(
70
+ cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
71
+ )
72
+ skip = kwargs.pop("skip_instance_cache", False)
73
+ if os.getpid() != cls._pid:
74
+ cls._cache.clear()
75
+ cls._pid = os.getpid()
76
+ if not skip and cls.cachable and token in cls._cache:
77
+ cls._latest = token
78
+ return cls._cache[token]
79
+ else:
80
+ obj = super().__call__(*args, **kwargs)
81
+ # Setting _fs_token here causes some static linters to complain.
82
+ obj._fs_token_ = token
83
+ obj.storage_args = args
84
+ obj.storage_options = kwargs
85
+ if obj.async_impl and obj.mirror_sync_methods:
86
+ from .asyn import mirror_sync_methods
87
+
88
+ mirror_sync_methods(obj)
89
+
90
+ if cls.cachable and not skip:
91
+ cls._latest = token
92
+ cls._cache[token] = obj
93
+ return obj
94
+
95
+
96
+ class AbstractFileSystem(metaclass=_Cached):
97
+ """
98
+ An abstract super-class for pythonic file-systems
99
+
100
+ Implementations are expected to be compatible with or, better, subclass
101
+ from here.
102
+ """
103
+
104
+ cachable = True # this class can be cached, instances reused
105
+ _cached = False
106
+ blocksize = 2**22
107
+ sep = "/"
108
+ protocol: ClassVar[str | tuple[str, ...]] = "abstract"
109
+ _latest = None
110
+ async_impl = False
111
+ mirror_sync_methods = False
112
+ root_marker = "" # For some FSs, may require leading '/' or other character
113
+ transaction_type = Transaction
114
+
115
+ #: Extra *class attributes* that should be considered when hashing.
116
+ _extra_tokenize_attributes = ()
117
+
118
+ def __init__(self, *args, **storage_options):
119
+ """Create and configure file-system instance
120
+
121
+ Instances may be cachable, so if similar enough arguments are seen
122
+ a new instance is not required. The token attribute exists to allow
123
+ implementations to cache instances if they wish.
124
+
125
+ A reasonable default should be provided if there are no arguments.
126
+
127
+ Subclasses should call this method.
128
+
129
+ Parameters
130
+ ----------
131
+ use_listings_cache, listings_expiry_time, max_paths:
132
+ passed to ``DirCache``, if the implementation supports
133
+ directory listing caching. Pass use_listings_cache=False
134
+ to disable such caching.
135
+ skip_instance_cache: bool
136
+ If this is a cachable implementation, pass True here to force
137
+ creating a new instance even if a matching instance exists, and prevent
138
+ storing this instance.
139
+ asynchronous: bool
140
+ loop: asyncio-compatible IOLoop or None
141
+ """
142
+ if self._cached:
143
+ # reusing instance, don't change
144
+ return
145
+ self._cached = True
146
+ self._intrans = False
147
+ self._transaction = None
148
+ self._invalidated_caches_in_transaction = []
149
+ self.dircache = DirCache(**storage_options)
150
+
151
+ if storage_options.pop("add_docs", None):
152
+ warnings.warn("add_docs is no longer supported.", FutureWarning)
153
+
154
+ if storage_options.pop("add_aliases", None):
155
+ warnings.warn("add_aliases has been removed.", FutureWarning)
156
+ # This is set in _Cached
157
+ self._fs_token_ = None
158
+
159
+ @property
160
+ def fsid(self):
161
+ """Persistent filesystem id that can be used to compare filesystems
162
+ across sessions.
163
+ """
164
+ raise NotImplementedError
165
+
166
+ @property
167
+ def _fs_token(self):
168
+ return self._fs_token_
169
+
170
+ def __dask_tokenize__(self):
171
+ return self._fs_token
172
+
173
+ def __hash__(self):
174
+ return int(self._fs_token, 16)
175
+
176
+ def __eq__(self, other):
177
+ return isinstance(other, type(self)) and self._fs_token == other._fs_token
178
+
179
+ def __reduce__(self):
180
+ return make_instance, (type(self), self.storage_args, self.storage_options)
181
+
182
+ @classmethod
183
+ def _strip_protocol(cls, path):
184
+ """Turn path from fully-qualified to file-system-specific
185
+
186
+ May require FS-specific handling, e.g., for relative paths or links.
187
+ """
188
+ if isinstance(path, list):
189
+ return [cls._strip_protocol(p) for p in path]
190
+ path = stringify_path(path)
191
+ protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
192
+ for protocol in protos:
193
+ if path.startswith(protocol + "://"):
194
+ path = path[len(protocol) + 3 :]
195
+ elif path.startswith(protocol + "::"):
196
+ path = path[len(protocol) + 2 :]
197
+ path = path.rstrip("/")
198
+ # use of root_marker to make minimum required path, e.g., "/"
199
+ return path or cls.root_marker
200
+
201
+ def unstrip_protocol(self, name: str) -> str:
202
+ """Format FS-specific path to generic, including protocol"""
203
+ protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
204
+ for protocol in protos:
205
+ if name.startswith(f"{protocol}://"):
206
+ return name
207
+ return f"{protos[0]}://{name}"
208
+
209
+ @staticmethod
210
+ def _get_kwargs_from_urls(path):
211
+ """If kwargs can be encoded in the paths, extract them here
212
+
213
+ This should happen before instantiation of the class; incoming paths
214
+ then should be amended to strip the options in methods.
215
+
216
+ Examples may look like an sftp path "sftp://user@host:/my/path", where
217
+ the user and host should become kwargs and later get stripped.
218
+ """
219
+ # by default, nothing happens
220
+ return {}
221
+
222
+ @classmethod
223
+ def current(cls):
224
+ """Return the most recently instantiated FileSystem
225
+
226
+ If no instance has been created, then create one with defaults
227
+ """
228
+ if cls._latest in cls._cache:
229
+ return cls._cache[cls._latest]
230
+ return cls()
231
+
232
+ @property
233
+ def transaction(self):
234
+ """A context within which files are committed together upon exit
235
+
236
+ Requires the file class to implement `.commit()` and `.discard()`
237
+ for the normal and exception cases.
238
+ """
239
+ if self._transaction is None:
240
+ self._transaction = self.transaction_type(self)
241
+ return self._transaction
242
+
243
+ def start_transaction(self):
244
+ """Begin write transaction for deferring files, non-context version"""
245
+ self._intrans = True
246
+ self._transaction = self.transaction_type(self)
247
+ return self.transaction
248
+
249
+ def end_transaction(self):
250
+ """Finish write transaction, non-context version"""
251
+ self.transaction.complete()
252
+ self._transaction = None
253
+ # The invalid cache must be cleared after the transaction is completed.
254
+ for path in self._invalidated_caches_in_transaction:
255
+ self.invalidate_cache(path)
256
+ self._invalidated_caches_in_transaction.clear()
257
+
258
+ def invalidate_cache(self, path=None):
259
+ """
260
+ Discard any cached directory information
261
+
262
+ Parameters
263
+ ----------
264
+ path: string or None
265
+ If None, clear all listings cached else listings at or under given
266
+ path.
267
+ """
268
+ # Not necessary to implement invalidation mechanism, may have no cache.
269
+ # But if have, you should call this method of parent class from your
270
+ # subclass to ensure expiring caches after transacations correctly.
271
+ # See the implementation of FTPFileSystem in ftp.py
272
+ if self._intrans:
273
+ self._invalidated_caches_in_transaction.append(path)
274
+
275
+ def mkdir(self, path, create_parents=True, **kwargs):
276
+ """
277
+ Create directory entry at path
278
+
279
+ For systems that don't have true directories, may create an for
280
+ this instance only and not touch the real filesystem
281
+
282
+ Parameters
283
+ ----------
284
+ path: str
285
+ location
286
+ create_parents: bool
287
+ if True, this is equivalent to ``makedirs``
288
+ kwargs:
289
+ may be permissions, etc.
290
+ """
291
+ pass # not necessary to implement, may not have directories
292
+
293
+ def makedirs(self, path, exist_ok=False):
294
+ """Recursively make directories
295
+
296
+ Creates directory at path and any intervening required directories.
297
+ Raises exception if, for instance, the path already exists but is a
298
+ file.
299
+
300
+ Parameters
301
+ ----------
302
+ path: str
303
+ leaf directory name
304
+ exist_ok: bool (False)
305
+ If False, will error if the target already exists
306
+ """
307
+ pass # not necessary to implement, may not have directories
308
+
309
+ def rmdir(self, path):
310
+ """Remove a directory, if empty"""
311
+ pass # not necessary to implement, may not have directories
312
+
313
+ def ls(self, path, detail=True, **kwargs):
314
+ """List objects at path.
315
+
316
+ This should include subdirectories and files at that location. The
317
+ difference between a file and a directory must be clear when details
318
+ are requested.
319
+
320
+ The specific keys, or perhaps a FileInfo class, or similar, is TBD,
321
+ but must be consistent across implementations.
322
+ Must include:
323
+
324
+ - full path to the entry (without protocol)
325
+ - size of the entry, in bytes. If the value cannot be determined, will
326
+ be ``None``.
327
+ - type of entry, "file", "directory" or other
328
+
329
+ Additional information
330
+ may be present, appropriate to the file-system, e.g., generation,
331
+ checksum, etc.
332
+
333
+ May use refresh=True|False to allow use of self._ls_from_cache to
334
+ check for a saved listing and avoid calling the backend. This would be
335
+ common where listing may be expensive.
336
+
337
+ Parameters
338
+ ----------
339
+ path: str
340
+ detail: bool
341
+ if True, gives a list of dictionaries, where each is the same as
342
+ the result of ``info(path)``. If False, gives a list of paths
343
+ (str).
344
+ kwargs: may have additional backend-specific options, such as version
345
+ information
346
+
347
+ Returns
348
+ -------
349
+ List of strings if detail is False, or list of directory information
350
+ dicts if detail is True.
351
+ """
352
+ raise NotImplementedError
353
+
354
+ def _ls_from_cache(self, path):
355
+ """Check cache for listing
356
+
357
+ Returns listing, if found (may be empty list for a directly that exists
358
+ but contains nothing), None if not in cache.
359
+ """
360
+ parent = self._parent(path)
361
+ if path.rstrip("/") in self.dircache:
362
+ return self.dircache[path.rstrip("/")]
363
+ try:
364
+ files = [
365
+ f
366
+ for f in self.dircache[parent]
367
+ if f["name"] == path
368
+ or (f["name"] == path.rstrip("/") and f["type"] == "directory")
369
+ ]
370
+ if len(files) == 0:
371
+ # parent dir was listed but did not contain this file
372
+ raise FileNotFoundError(path)
373
+ return files
374
+ except KeyError:
375
+ pass
376
+
377
+ def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
378
+ """Return all files belows path
379
+
380
+ List all files, recursing into subdirectories; output is iterator-style,
381
+ like ``os.walk()``. For a simple list of files, ``find()`` is available.
382
+
383
+ When topdown is True, the caller can modify the dirnames list in-place (perhaps
384
+ using del or slice assignment), and walk() will
385
+ only recurse into the subdirectories whose names remain in dirnames;
386
+ this can be used to prune the search, impose a specific order of visiting,
387
+ or even to inform walk() about directories the caller creates or renames before
388
+ it resumes walk() again.
389
+ Modifying dirnames when topdown is False has no effect. (see os.walk)
390
+
391
+ Note that the "files" outputted will include anything that is not
392
+ a directory, such as links.
393
+
394
+ Parameters
395
+ ----------
396
+ path: str
397
+ Root to recurse into
398
+ maxdepth: int
399
+ Maximum recursion depth. None means limitless, but not recommended
400
+ on link-based file-systems.
401
+ topdown: bool (True)
402
+ Whether to walk the directory tree from the top downwards or from
403
+ the bottom upwards.
404
+ on_error: "omit", "raise", a collable
405
+ if omit (default), path with exception will simply be empty;
406
+ If raise, an underlying exception will be raised;
407
+ if callable, it will be called with a single OSError instance as argument
408
+ kwargs: passed to ``ls``
409
+ """
410
+ if maxdepth is not None and maxdepth < 1:
411
+ raise ValueError("maxdepth must be at least 1")
412
+
413
+ path = self._strip_protocol(path)
414
+ full_dirs = {}
415
+ dirs = {}
416
+ files = {}
417
+
418
+ detail = kwargs.pop("detail", False)
419
+ try:
420
+ listing = self.ls(path, detail=True, **kwargs)
421
+ except (FileNotFoundError, OSError) as e:
422
+ if on_error == "raise":
423
+ raise
424
+ elif callable(on_error):
425
+ on_error(e)
426
+ if detail:
427
+ return path, {}, {}
428
+ return path, [], []
429
+
430
+ for info in listing:
431
+ # each info name must be at least [path]/part , but here
432
+ # we check also for names like [path]/part/
433
+ pathname = info["name"].rstrip("/")
434
+ name = pathname.rsplit("/", 1)[-1]
435
+ if info["type"] == "directory" and pathname != path:
436
+ # do not include "self" path
437
+ full_dirs[name] = pathname
438
+ dirs[name] = info
439
+ elif pathname == path:
440
+ # file-like with same name as give path
441
+ files[""] = info
442
+ else:
443
+ files[name] = info
444
+
445
+ if not detail:
446
+ dirs = list(dirs)
447
+ files = list(files)
448
+
449
+ if topdown:
450
+ # Yield before recursion if walking top down
451
+ yield path, dirs, files
452
+
453
+ if maxdepth is not None:
454
+ maxdepth -= 1
455
+ if maxdepth < 1:
456
+ if not topdown:
457
+ yield path, dirs, files
458
+ return
459
+
460
+ for d in dirs:
461
+ yield from self.walk(
462
+ full_dirs[d],
463
+ maxdepth=maxdepth,
464
+ detail=detail,
465
+ topdown=topdown,
466
+ **kwargs,
467
+ )
468
+
469
+ if not topdown:
470
+ # Yield after recursion if walking bottom up
471
+ yield path, dirs, files
472
+
473
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
474
+ """List all files below path.
475
+
476
+ Like posix ``find`` command without conditions
477
+
478
+ Parameters
479
+ ----------
480
+ path : str
481
+ maxdepth: int or None
482
+ If not None, the maximum number of levels to descend
483
+ withdirs: bool
484
+ Whether to include directory paths in the output. This is True
485
+ when used by glob, but users usually only want files.
486
+ kwargs are passed to ``ls``.
487
+ """
488
+ # TODO: allow equivalent of -name parameter
489
+ path = self._strip_protocol(path)
490
+ out = {}
491
+
492
+ # Add the root directory if withdirs is requested
493
+ # This is needed for posix glob compliance
494
+ if withdirs and path != "" and self.isdir(path):
495
+ out[path] = self.info(path)
496
+
497
+ for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
498
+ if withdirs:
499
+ files.update(dirs)
500
+ out.update({info["name"]: info for name, info in files.items()})
501
+ if not out and self.isfile(path):
502
+ # walk works on directories, but find should also return [path]
503
+ # when path happens to be a file
504
+ out[path] = {}
505
+ names = sorted(out)
506
+ if not detail:
507
+ return names
508
+ else:
509
+ return {name: out[name] for name in names}
510
+
511
+ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
512
+ """Space used by files and optionally directories within a path
513
+
514
+ Directory size does not include the size of its contents.
515
+
516
+ Parameters
517
+ ----------
518
+ path: str
519
+ total: bool
520
+ Whether to sum all the file sizes
521
+ maxdepth: int or None
522
+ Maximum number of directory levels to descend, None for unlimited.
523
+ withdirs: bool
524
+ Whether to include directory paths in the output.
525
+ kwargs: passed to ``find``
526
+
527
+ Returns
528
+ -------
529
+ Dict of {path: size} if total=False, or int otherwise, where numbers
530
+ refer to bytes used.
531
+ """
532
+ sizes = {}
533
+ if withdirs and self.isdir(path):
534
+ # Include top-level directory in output
535
+ info = self.info(path)
536
+ sizes[info["name"]] = info["size"]
537
+ for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
538
+ info = self.info(f)
539
+ sizes[info["name"]] = info["size"]
540
+ if total:
541
+ return sum(sizes.values())
542
+ else:
543
+ return sizes
544
+
545
+ def glob(self, path, maxdepth=None, **kwargs):
546
+ """
547
+ Find files by glob-matching.
548
+
549
+ If the path ends with '/', only folders are returned.
550
+
551
+ We support ``"**"``,
552
+ ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
553
+
554
+ The `maxdepth` option is applied on the first `**` found in the path.
555
+
556
+ kwargs are passed to ``ls``.
557
+ """
558
+ if maxdepth is not None and maxdepth < 1:
559
+ raise ValueError("maxdepth must be at least 1")
560
+
561
+ import re
562
+
563
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
564
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
565
+ path = self._strip_protocol(path)
566
+ append_slash_to_dirname = ends_with_sep or path.endswith(
567
+ tuple(sep + "**" for sep in seps)
568
+ )
569
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
570
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
571
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
572
+
573
+ min_idx = min(idx_star, idx_qmark, idx_brace)
574
+
575
+ detail = kwargs.pop("detail", False)
576
+
577
+ if not has_magic(path):
578
+ if self.exists(path, **kwargs):
579
+ if not detail:
580
+ return [path]
581
+ else:
582
+ return {path: self.info(path, **kwargs)}
583
+ else:
584
+ if not detail:
585
+ return [] # glob of non-existent returns empty
586
+ else:
587
+ return {}
588
+ elif "/" in path[:min_idx]:
589
+ min_idx = path[:min_idx].rindex("/")
590
+ root = path[: min_idx + 1]
591
+ depth = path[min_idx + 1 :].count("/") + 1
592
+ else:
593
+ root = ""
594
+ depth = path[min_idx + 1 :].count("/") + 1
595
+
596
+ if "**" in path:
597
+ if maxdepth is not None:
598
+ idx_double_stars = path.find("**")
599
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
600
+ depth = depth - depth_double_stars + maxdepth
601
+ else:
602
+ depth = None
603
+
604
+ allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
605
+
606
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
607
+ pattern = re.compile(pattern)
608
+
609
+ out = {
610
+ p: info
611
+ for p, info in sorted(allpaths.items())
612
+ if pattern.match(
613
+ (
614
+ p + "/"
615
+ if append_slash_to_dirname and info["type"] == "directory"
616
+ else p
617
+ )
618
+ )
619
+ }
620
+
621
+ if detail:
622
+ return out
623
+ else:
624
+ return list(out)
625
+
626
+ def exists(self, path, **kwargs):
627
+ """Is there a file at the given path"""
628
+ try:
629
+ self.info(path, **kwargs)
630
+ return True
631
+ except: # noqa: E722
632
+ # any exception allowed bar FileNotFoundError?
633
+ return False
634
+
635
+ def lexists(self, path, **kwargs):
636
+ """If there is a file at the given path (including
637
+ broken links)"""
638
+ return self.exists(path)
639
+
640
+ def info(self, path, **kwargs):
641
+ """Give details of entry at path
642
+
643
+ Returns a single dictionary, with exactly the same information as ``ls``
644
+ would with ``detail=True``.
645
+
646
+ The default implementation should calls ls and could be overridden by a
647
+ shortcut. kwargs are passed on to ```ls()``.
648
+
649
+ Some file systems might not be able to measure the file's size, in
650
+ which case, the returned dict will include ``'size': None``.
651
+
652
+ Returns
653
+ -------
654
+ dict with keys: name (full path in the FS), size (in bytes), type (file,
655
+ directory, or something else) and other FS-specific keys.
656
+ """
657
+ path = self._strip_protocol(path)
658
+ out = self.ls(self._parent(path), detail=True, **kwargs)
659
+ out = [o for o in out if o["name"].rstrip("/") == path]
660
+ if out:
661
+ return out[0]
662
+ out = self.ls(path, detail=True, **kwargs)
663
+ path = path.rstrip("/")
664
+ out1 = [o for o in out if o["name"].rstrip("/") == path]
665
+ if len(out1) == 1:
666
+ if "size" not in out1[0]:
667
+ out1[0]["size"] = None
668
+ return out1[0]
669
+ elif len(out1) > 1 or out:
670
+ return {"name": path, "size": 0, "type": "directory"}
671
+ else:
672
+ raise FileNotFoundError(path)
673
+
674
+ def checksum(self, path):
675
+ """Unique value for current version of file
676
+
677
+ If the checksum is the same from one moment to another, the contents
678
+ are guaranteed to be the same. If the checksum changes, the contents
679
+ *might* have changed.
680
+
681
+ This should normally be overridden; default will probably capture
682
+ creation/modification timestamp (which would be good) or maybe
683
+ access timestamp (which would be bad)
684
+ """
685
+ return int(tokenize(self.info(path)), 16)
686
+
687
+ def size(self, path):
688
+ """Size in bytes of file"""
689
+ return self.info(path).get("size", None)
690
+
691
+ def sizes(self, paths):
692
+ """Size in bytes of each file in a list of paths"""
693
+ return [self.size(p) for p in paths]
694
+
695
+ def isdir(self, path):
696
+ """Is this entry directory-like?"""
697
+ try:
698
+ return self.info(path)["type"] == "directory"
699
+ except OSError:
700
+ return False
701
+
702
+ def isfile(self, path):
703
+ """Is this entry file-like?"""
704
+ try:
705
+ return self.info(path)["type"] == "file"
706
+ except: # noqa: E722
707
+ return False
708
+
709
+ def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
710
+ """Get the contents of the file as a string.
711
+
712
+ Parameters
713
+ ----------
714
+ path: str
715
+ URL of file on this filesystems
716
+ encoding, errors, newline: same as `open`.
717
+ """
718
+ with self.open(
719
+ path,
720
+ mode="r",
721
+ encoding=encoding,
722
+ errors=errors,
723
+ newline=newline,
724
+ **kwargs,
725
+ ) as f:
726
+ return f.read()
727
+
728
+ def write_text(
729
+ self, path, value, encoding=None, errors=None, newline=None, **kwargs
730
+ ):
731
+ """Write the text to the given file.
732
+
733
+ An existing file will be overwritten.
734
+
735
+ Parameters
736
+ ----------
737
+ path: str
738
+ URL of file on this filesystems
739
+ value: str
740
+ Text to write.
741
+ encoding, errors, newline: same as `open`.
742
+ """
743
+ with self.open(
744
+ path,
745
+ mode="w",
746
+ encoding=encoding,
747
+ errors=errors,
748
+ newline=newline,
749
+ **kwargs,
750
+ ) as f:
751
+ return f.write(value)
752
+
753
+ def cat_file(self, path, start=None, end=None, **kwargs):
754
+ """Get the content of a file
755
+
756
+ Parameters
757
+ ----------
758
+ path: URL of file on this filesystems
759
+ start, end: int
760
+ Bytes limits of the read. If negative, backwards from end,
761
+ like usual python slices. Either can be None for start or
762
+ end of file, respectively
763
+ kwargs: passed to ``open()``.
764
+ """
765
+ # explicitly set buffering off?
766
+ with self.open(path, "rb", **kwargs) as f:
767
+ if start is not None:
768
+ if start >= 0:
769
+ f.seek(start)
770
+ else:
771
+ f.seek(max(0, f.size + start))
772
+ if end is not None:
773
+ if end < 0:
774
+ end = f.size + end
775
+ return f.read(end - f.tell())
776
+ return f.read()
777
+
778
+ def pipe_file(self, path, value, **kwargs):
779
+ """Set the bytes of given file"""
780
+ with self.open(path, "wb", **kwargs) as f:
781
+ f.write(value)
782
+
783
+ def pipe(self, path, value=None, **kwargs):
784
+ """Put value into path
785
+
786
+ (counterpart to ``cat``)
787
+
788
+ Parameters
789
+ ----------
790
+ path: string or dict(str, bytes)
791
+ If a string, a single remote location to put ``value`` bytes; if a dict,
792
+ a mapping of {path: bytesvalue}.
793
+ value: bytes, optional
794
+ If using a single path, these are the bytes to put there. Ignored if
795
+ ``path`` is a dict
796
+ """
797
+ if isinstance(path, str):
798
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
799
+ elif isinstance(path, dict):
800
+ for k, v in path.items():
801
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
802
+ else:
803
+ raise ValueError("path must be str or dict")
804
+
805
+ def cat_ranges(
806
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
807
+ ):
808
+ """Get the contents of byte ranges from one or more files
809
+
810
+ Parameters
811
+ ----------
812
+ paths: list
813
+ A list of of filepaths on this filesystems
814
+ starts, ends: int or list
815
+ Bytes limits of the read. If using a single int, the same value will be
816
+ used to read all the specified files.
817
+ """
818
+ if max_gap is not None:
819
+ raise NotImplementedError
820
+ if not isinstance(paths, list):
821
+ raise TypeError
822
+ if not isinstance(starts, list):
823
+ starts = [starts] * len(paths)
824
+ if not isinstance(ends, list):
825
+ ends = [ends] * len(paths)
826
+ if len(starts) != len(paths) or len(ends) != len(paths):
827
+ raise ValueError
828
+ out = []
829
+ for p, s, e in zip(paths, starts, ends):
830
+ try:
831
+ out.append(self.cat_file(p, s, e))
832
+ except Exception as e:
833
+ if on_error == "return":
834
+ out.append(e)
835
+ else:
836
+ raise
837
+ return out
838
+
839
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
840
+ """Fetch (potentially multiple) paths' contents
841
+
842
+ Parameters
843
+ ----------
844
+ recursive: bool
845
+ If True, assume the path(s) are directories, and get all the
846
+ contained files
847
+ on_error : "raise", "omit", "return"
848
+ If raise, an underlying exception will be raised (converted to KeyError
849
+ if the type is in self.missing_exceptions); if omit, keys with exception
850
+ will simply not be included in the output; if "return", all keys are
851
+ included in the output, but the value will be bytes or an exception
852
+ instance.
853
+ kwargs: passed to cat_file
854
+
855
+ Returns
856
+ -------
857
+ dict of {path: contents} if there are multiple paths
858
+ or the path has been otherwise expanded
859
+ """
860
+ paths = self.expand_path(path, recursive=recursive)
861
+ if (
862
+ len(paths) > 1
863
+ or isinstance(path, list)
864
+ or paths[0] != self._strip_protocol(path)
865
+ ):
866
+ out = {}
867
+ for path in paths:
868
+ try:
869
+ out[path] = self.cat_file(path, **kwargs)
870
+ except Exception as e:
871
+ if on_error == "raise":
872
+ raise
873
+ if on_error == "return":
874
+ out[path] = e
875
+ return out
876
+ else:
877
+ return self.cat_file(paths[0], **kwargs)
878
+
879
+ def get_file(
880
+ self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
881
+ ):
882
+ """Copy single remote file to local"""
883
+ from .implementations.local import LocalFileSystem
884
+
885
+ if isfilelike(lpath):
886
+ outfile = lpath
887
+ elif self.isdir(rpath):
888
+ os.makedirs(lpath, exist_ok=True)
889
+ return None
890
+
891
+ fs = LocalFileSystem(auto_mkdir=True)
892
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
893
+
894
+ with self.open(rpath, "rb", **kwargs) as f1:
895
+ if outfile is None:
896
+ outfile = open(lpath, "wb")
897
+
898
+ try:
899
+ callback.set_size(getattr(f1, "size", None))
900
+ data = True
901
+ while data:
902
+ data = f1.read(self.blocksize)
903
+ segment_len = outfile.write(data)
904
+ if segment_len is None:
905
+ segment_len = len(data)
906
+ callback.relative_update(segment_len)
907
+ finally:
908
+ if not isfilelike(lpath):
909
+ outfile.close()
910
+
911
+ def get(
912
+ self,
913
+ rpath,
914
+ lpath,
915
+ recursive=False,
916
+ callback=_DEFAULT_CALLBACK,
917
+ maxdepth=None,
918
+ **kwargs,
919
+ ):
920
+ """Copy file(s) to local.
921
+
922
+ Copies a specific file or tree of files (if recursive=True). If lpath
923
+ ends with a "/", it will be assumed to be a directory, and target files
924
+ will go within. Can submit a list of paths, which may be glob-patterns
925
+ and will be expanded.
926
+
927
+ Calls get_file for each source.
928
+ """
929
+ if isinstance(lpath, list) and isinstance(rpath, list):
930
+ # No need to expand paths when both source and destination
931
+ # are provided as lists
932
+ rpaths = rpath
933
+ lpaths = lpath
934
+ else:
935
+ from .implementations.local import (
936
+ LocalFileSystem,
937
+ make_path_posix,
938
+ trailing_sep,
939
+ )
940
+
941
+ source_is_str = isinstance(rpath, str)
942
+ rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
943
+ if source_is_str and (not recursive or maxdepth is not None):
944
+ # Non-recursive glob does not copy directories
945
+ rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
946
+ if not rpaths:
947
+ return
948
+
949
+ if isinstance(lpath, str):
950
+ lpath = make_path_posix(lpath)
951
+
952
+ source_is_file = len(rpaths) == 1
953
+ dest_is_dir = isinstance(lpath, str) and (
954
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
955
+ )
956
+
957
+ exists = source_is_str and (
958
+ (has_magic(rpath) and source_is_file)
959
+ or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
960
+ )
961
+ lpaths = other_paths(
962
+ rpaths,
963
+ lpath,
964
+ exists=exists,
965
+ flatten=not source_is_str,
966
+ )
967
+
968
+ callback.set_size(len(lpaths))
969
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
970
+ callback.branch(rpath, lpath, kwargs)
971
+ self.get_file(rpath, lpath, **kwargs)
972
+
973
+ def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
974
+ """Copy single file to remote"""
975
+ if os.path.isdir(lpath):
976
+ self.makedirs(rpath, exist_ok=True)
977
+ return None
978
+
979
+ with open(lpath, "rb") as f1:
980
+ size = f1.seek(0, 2)
981
+ callback.set_size(size)
982
+ f1.seek(0)
983
+
984
+ self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
985
+ with self.open(rpath, "wb", **kwargs) as f2:
986
+ while f1.tell() < size:
987
+ data = f1.read(self.blocksize)
988
+ segment_len = f2.write(data)
989
+ if segment_len is None:
990
+ segment_len = len(data)
991
+ callback.relative_update(segment_len)
992
+
993
+ def put(
994
+ self,
995
+ lpath,
996
+ rpath,
997
+ recursive=False,
998
+ callback=_DEFAULT_CALLBACK,
999
+ maxdepth=None,
1000
+ **kwargs,
1001
+ ):
1002
+ """Copy file(s) from local.
1003
+
1004
+ Copies a specific file or tree of files (if recursive=True). If rpath
1005
+ ends with a "/", it will be assumed to be a directory, and target files
1006
+ will go within.
1007
+
1008
+ Calls put_file for each source.
1009
+ """
1010
+ if isinstance(lpath, list) and isinstance(rpath, list):
1011
+ # No need to expand paths when both source and destination
1012
+ # are provided as lists
1013
+ rpaths = rpath
1014
+ lpaths = lpath
1015
+ else:
1016
+ from .implementations.local import (
1017
+ LocalFileSystem,
1018
+ make_path_posix,
1019
+ trailing_sep,
1020
+ )
1021
+
1022
+ source_is_str = isinstance(lpath, str)
1023
+ if source_is_str:
1024
+ lpath = make_path_posix(lpath)
1025
+ fs = LocalFileSystem()
1026
+ lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
1027
+ if source_is_str and (not recursive or maxdepth is not None):
1028
+ # Non-recursive glob does not copy directories
1029
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
1030
+ if not lpaths:
1031
+ return
1032
+
1033
+ source_is_file = len(lpaths) == 1
1034
+ dest_is_dir = isinstance(rpath, str) and (
1035
+ trailing_sep(rpath) or self.isdir(rpath)
1036
+ )
1037
+
1038
+ rpath = (
1039
+ self._strip_protocol(rpath)
1040
+ if isinstance(rpath, str)
1041
+ else [self._strip_protocol(p) for p in rpath]
1042
+ )
1043
+ exists = source_is_str and (
1044
+ (has_magic(lpath) and source_is_file)
1045
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
1046
+ )
1047
+ rpaths = other_paths(
1048
+ lpaths,
1049
+ rpath,
1050
+ exists=exists,
1051
+ flatten=not source_is_str,
1052
+ )
1053
+
1054
+ callback.set_size(len(rpaths))
1055
+ for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1056
+ callback.branch(lpath, rpath, kwargs)
1057
+ self.put_file(lpath, rpath, **kwargs)
1058
+
1059
+ def head(self, path, size=1024):
1060
+ """Get the first ``size`` bytes from file"""
1061
+ with self.open(path, "rb") as f:
1062
+ return f.read(size)
1063
+
1064
+ def tail(self, path, size=1024):
1065
+ """Get the last ``size`` bytes from file"""
1066
+ with self.open(path, "rb") as f:
1067
+ f.seek(max(-size, -f.size), 2)
1068
+ return f.read()
1069
+
1070
+ def cp_file(self, path1, path2, **kwargs):
1071
+ raise NotImplementedError
1072
+
1073
+ def copy(
1074
+ self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
1075
+ ):
1076
+ """Copy within two locations in the filesystem
1077
+
1078
+ on_error : "raise", "ignore"
1079
+ If raise, any not-found exceptions will be raised; if ignore any
1080
+ not-found exceptions will cause the path to be skipped; defaults to
1081
+ raise unless recursive is true, where the default is ignore
1082
+ """
1083
+ if on_error is None and recursive:
1084
+ on_error = "ignore"
1085
+ elif on_error is None:
1086
+ on_error = "raise"
1087
+
1088
+ if isinstance(path1, list) and isinstance(path2, list):
1089
+ # No need to expand paths when both source and destination
1090
+ # are provided as lists
1091
+ paths1 = path1
1092
+ paths2 = path2
1093
+ else:
1094
+ from .implementations.local import trailing_sep
1095
+
1096
+ source_is_str = isinstance(path1, str)
1097
+ paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
1098
+ if source_is_str and (not recursive or maxdepth is not None):
1099
+ # Non-recursive glob does not copy directories
1100
+ paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
1101
+ if not paths1:
1102
+ return
1103
+
1104
+ source_is_file = len(paths1) == 1
1105
+ dest_is_dir = isinstance(path2, str) and (
1106
+ trailing_sep(path2) or self.isdir(path2)
1107
+ )
1108
+
1109
+ exists = source_is_str and (
1110
+ (has_magic(path1) and source_is_file)
1111
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
1112
+ )
1113
+ paths2 = other_paths(
1114
+ paths1,
1115
+ path2,
1116
+ exists=exists,
1117
+ flatten=not source_is_str,
1118
+ )
1119
+
1120
+ for p1, p2 in zip(paths1, paths2):
1121
+ try:
1122
+ self.cp_file(p1, p2, **kwargs)
1123
+ except FileNotFoundError:
1124
+ if on_error == "raise":
1125
+ raise
1126
+
1127
+ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
1128
+ """Turn one or more globs or directories into a list of all matching paths
1129
+ to files or directories.
1130
+
1131
+ kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
1132
+ """
1133
+
1134
+ if maxdepth is not None and maxdepth < 1:
1135
+ raise ValueError("maxdepth must be at least 1")
1136
+
1137
+ if isinstance(path, str):
1138
+ out = self.expand_path([path], recursive, maxdepth)
1139
+ else:
1140
+ out = set()
1141
+ path = [self._strip_protocol(p) for p in path]
1142
+ for p in path:
1143
+ if has_magic(p):
1144
+ bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
1145
+ out |= bit
1146
+ if recursive:
1147
+ # glob call above expanded one depth so if maxdepth is defined
1148
+ # then decrement it in expand_path call below. If it is zero
1149
+ # after decrementing then avoid expand_path call.
1150
+ if maxdepth is not None and maxdepth <= 1:
1151
+ continue
1152
+ out |= set(
1153
+ self.expand_path(
1154
+ list(bit),
1155
+ recursive=recursive,
1156
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
1157
+ **kwargs,
1158
+ )
1159
+ )
1160
+ continue
1161
+ elif recursive:
1162
+ rec = set(
1163
+ self.find(
1164
+ p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
1165
+ )
1166
+ )
1167
+ out |= rec
1168
+ if p not in out and (recursive is False or self.exists(p)):
1169
+ # should only check once, for the root
1170
+ out.add(p)
1171
+ if not out:
1172
+ raise FileNotFoundError(path)
1173
+ return sorted(out)
1174
+
1175
+ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
1176
+ """Move file(s) from one location to another"""
1177
+ if path1 == path2:
1178
+ logger.debug("%s mv: The paths are the same, so no files were moved.", self)
1179
+ else:
1180
+ self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
1181
+ self.rm(path1, recursive=recursive)
1182
+
1183
+ def rm_file(self, path):
1184
+ """Delete a file"""
1185
+ self._rm(path)
1186
+
1187
+ def _rm(self, path):
1188
+ """Delete one file"""
1189
+ # this is the old name for the method, prefer rm_file
1190
+ raise NotImplementedError
1191
+
1192
+ def rm(self, path, recursive=False, maxdepth=None):
1193
+ """Delete files.
1194
+
1195
+ Parameters
1196
+ ----------
1197
+ path: str or list of str
1198
+ File(s) to delete.
1199
+ recursive: bool
1200
+ If file(s) are directories, recursively delete contents and then
1201
+ also remove the directory
1202
+ maxdepth: int or None
1203
+ Depth to pass to walk for finding files to delete, if recursive.
1204
+ If None, there will be no limit and infinite recursion may be
1205
+ possible.
1206
+ """
1207
+ path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
1208
+ for p in reversed(path):
1209
+ self.rm_file(p)
1210
+
1211
+ @classmethod
1212
+ def _parent(cls, path):
1213
+ path = cls._strip_protocol(path)
1214
+ if "/" in path:
1215
+ parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
1216
+ return cls.root_marker + parent
1217
+ else:
1218
+ return cls.root_marker
1219
+
1220
+ def _open(
1221
+ self,
1222
+ path,
1223
+ mode="rb",
1224
+ block_size=None,
1225
+ autocommit=True,
1226
+ cache_options=None,
1227
+ **kwargs,
1228
+ ):
1229
+ """Return raw bytes-mode file-like from the file-system"""
1230
+ return AbstractBufferedFile(
1231
+ self,
1232
+ path,
1233
+ mode,
1234
+ block_size,
1235
+ autocommit,
1236
+ cache_options=cache_options,
1237
+ **kwargs,
1238
+ )
1239
+
1240
+ def open(
1241
+ self,
1242
+ path,
1243
+ mode="rb",
1244
+ block_size=None,
1245
+ cache_options=None,
1246
+ compression=None,
1247
+ **kwargs,
1248
+ ):
1249
+ """
1250
+ Return a file-like object from the filesystem
1251
+
1252
+ The resultant instance must function correctly in a context ``with``
1253
+ block.
1254
+
1255
+ Parameters
1256
+ ----------
1257
+ path: str
1258
+ Target file
1259
+ mode: str like 'rb', 'w'
1260
+ See builtin ``open()``
1261
+ block_size: int
1262
+ Some indication of buffering - this is a value in bytes
1263
+ cache_options : dict, optional
1264
+ Extra arguments to pass through to the cache.
1265
+ compression: string or None
1266
+ If given, open file using compression codec. Can either be a compression
1267
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
1268
+ compression from the filename suffix.
1269
+ encoding, errors, newline: passed on to TextIOWrapper for text mode
1270
+ """
1271
+ import io
1272
+
1273
+ path = self._strip_protocol(path)
1274
+ if "b" not in mode:
1275
+ mode = mode.replace("t", "") + "b"
1276
+
1277
+ text_kwargs = {
1278
+ k: kwargs.pop(k)
1279
+ for k in ["encoding", "errors", "newline"]
1280
+ if k in kwargs
1281
+ }
1282
+ return io.TextIOWrapper(
1283
+ self.open(
1284
+ path,
1285
+ mode,
1286
+ block_size=block_size,
1287
+ cache_options=cache_options,
1288
+ compression=compression,
1289
+ **kwargs,
1290
+ ),
1291
+ **text_kwargs,
1292
+ )
1293
+ else:
1294
+ ac = kwargs.pop("autocommit", not self._intrans)
1295
+ f = self._open(
1296
+ path,
1297
+ mode=mode,
1298
+ block_size=block_size,
1299
+ autocommit=ac,
1300
+ cache_options=cache_options,
1301
+ **kwargs,
1302
+ )
1303
+ if compression is not None:
1304
+ from fsspec.compression import compr
1305
+ from fsspec.core import get_compression
1306
+
1307
+ compression = get_compression(path, compression)
1308
+ compress = compr[compression]
1309
+ f = compress(f, mode=mode[0])
1310
+
1311
+ if not ac and "r" not in mode:
1312
+ self.transaction.files.append(f)
1313
+ return f
1314
+
1315
+ def touch(self, path, truncate=True, **kwargs):
1316
+ """Create empty file, or update timestamp
1317
+
1318
+ Parameters
1319
+ ----------
1320
+ path: str
1321
+ file location
1322
+ truncate: bool
1323
+ If True, always set file size to 0; if False, update timestamp and
1324
+ leave file unchanged, if backend allows this
1325
+ """
1326
+ if truncate or not self.exists(path):
1327
+ with self.open(path, "wb", **kwargs):
1328
+ pass
1329
+ else:
1330
+ raise NotImplementedError # update timestamp, if possible
1331
+
1332
+ def ukey(self, path):
1333
+ """Hash of file properties, to tell if it has changed"""
1334
+ return sha256(str(self.info(path)).encode()).hexdigest()
1335
+
1336
+ def read_block(self, fn, offset, length, delimiter=None):
1337
+ """Read a block of bytes from
1338
+
1339
+ Starting at ``offset`` of the file, read ``length`` bytes. If
1340
+ ``delimiter`` is set then we ensure that the read starts and stops at
1341
+ delimiter boundaries that follow the locations ``offset`` and ``offset
1342
+ + length``. If ``offset`` is zero then we start at zero. The
1343
+ bytestring returned WILL include the end delimiter string.
1344
+
1345
+ If offset+length is beyond the eof, reads to eof.
1346
+
1347
+ Parameters
1348
+ ----------
1349
+ fn: string
1350
+ Path to filename
1351
+ offset: int
1352
+ Byte offset to start read
1353
+ length: int
1354
+ Number of bytes to read. If None, read to end.
1355
+ delimiter: bytes (optional)
1356
+ Ensure reading starts and stops at delimiter bytestring
1357
+
1358
+ Examples
1359
+ --------
1360
+ >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
1361
+ b'Alice, 100\\nBo'
1362
+ >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
1363
+ b'Alice, 100\\nBob, 200\\n'
1364
+
1365
+ Use ``length=None`` to read to the end of the file.
1366
+ >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
1367
+ b'Alice, 100\\nBob, 200\\nCharlie, 300'
1368
+
1369
+ See Also
1370
+ --------
1371
+ :func:`fsspec.utils.read_block`
1372
+ """
1373
+ with self.open(fn, "rb") as f:
1374
+ size = f.size
1375
+ if length is None:
1376
+ length = size
1377
+ if size is not None and offset + length > size:
1378
+ length = size - offset
1379
+ return read_block(f, offset, length, delimiter)
1380
+
1381
+ def to_json(self):
1382
+ """
1383
+ JSON representation of this filesystem instance
1384
+
1385
+ Returns
1386
+ -------
1387
+ str: JSON structure with keys cls (the python location of this class),
1388
+ protocol (text name of this class's protocol, first one in case of
1389
+ multiple), args (positional args, usually empty), and all other
1390
+ kwargs as their own keys.
1391
+ """
1392
+ import json
1393
+
1394
+ cls = type(self)
1395
+ cls = ".".join((cls.__module__, cls.__name__))
1396
+ proto = (
1397
+ self.protocol[0]
1398
+ if isinstance(self.protocol, (tuple, list))
1399
+ else self.protocol
1400
+ )
1401
+ return json.dumps(
1402
+ dict(
1403
+ **{"cls": cls, "protocol": proto, "args": self.storage_args},
1404
+ **self.storage_options,
1405
+ )
1406
+ )
1407
+
1408
+ @staticmethod
1409
+ def from_json(blob):
1410
+ """
1411
+ Recreate a filesystem instance from JSON representation
1412
+
1413
+ See ``.to_json()`` for the expected structure of the input
1414
+
1415
+ Parameters
1416
+ ----------
1417
+ blob: str
1418
+
1419
+ Returns
1420
+ -------
1421
+ file system instance, not necessarily of this particular class.
1422
+ """
1423
+ import json
1424
+
1425
+ from .registry import _import_class, get_filesystem_class
1426
+
1427
+ dic = json.loads(blob)
1428
+ protocol = dic.pop("protocol")
1429
+ try:
1430
+ cls = _import_class(dic.pop("cls"))
1431
+ except (ImportError, ValueError, RuntimeError, KeyError):
1432
+ cls = get_filesystem_class(protocol)
1433
+ return cls(*dic.pop("args", ()), **dic)
1434
+
1435
+ def _get_pyarrow_filesystem(self):
1436
+ """
1437
+ Make a version of the FS instance which will be acceptable to pyarrow
1438
+ """
1439
+ # all instances already also derive from pyarrow
1440
+ return self
1441
+
1442
+ def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
1443
+ """Create key/value store based on this file-system
1444
+
1445
+ Makes a MutableMapping interface to the FS at the given root path.
1446
+ See ``fsspec.mapping.FSMap`` for further details.
1447
+ """
1448
+ from .mapping import FSMap
1449
+
1450
+ return FSMap(
1451
+ root,
1452
+ self,
1453
+ check=check,
1454
+ create=create,
1455
+ missing_exceptions=missing_exceptions,
1456
+ )
1457
+
1458
+ @classmethod
1459
+ def clear_instance_cache(cls):
1460
+ """
1461
+ Clear the cache of filesystem instances.
1462
+
1463
+ Notes
1464
+ -----
1465
+ Unless overridden by setting the ``cachable`` class attribute to False,
1466
+ the filesystem class stores a reference to newly created instances. This
1467
+ prevents Python's normal rules around garbage collection from working,
1468
+ since the instances refcount will not drop to zero until
1469
+ ``clear_instance_cache`` is called.
1470
+ """
1471
+ cls._cache.clear()
1472
+
1473
+ def created(self, path):
1474
+ """Return the created timestamp of a file as a datetime.datetime"""
1475
+ raise NotImplementedError
1476
+
1477
+ def modified(self, path):
1478
+ """Return the modified timestamp of a file as a datetime.datetime"""
1479
+ raise NotImplementedError
1480
+
1481
+ # ------------------------------------------------------------------------
1482
+ # Aliases
1483
+
1484
+ def read_bytes(self, path, start=None, end=None, **kwargs):
1485
+ """Alias of `AbstractFileSystem.cat_file`."""
1486
+ return self.cat_file(path, start=start, end=end, **kwargs)
1487
+
1488
+ def write_bytes(self, path, value, **kwargs):
1489
+ """Alias of `AbstractFileSystem.pipe_file`."""
1490
+ self.pipe_file(path, value, **kwargs)
1491
+
1492
+ def makedir(self, path, create_parents=True, **kwargs):
1493
+ """Alias of `AbstractFileSystem.mkdir`."""
1494
+ return self.mkdir(path, create_parents=create_parents, **kwargs)
1495
+
1496
+ def mkdirs(self, path, exist_ok=False):
1497
+ """Alias of `AbstractFileSystem.makedirs`."""
1498
+ return self.makedirs(path, exist_ok=exist_ok)
1499
+
1500
+ def listdir(self, path, detail=True, **kwargs):
1501
+ """Alias of `AbstractFileSystem.ls`."""
1502
+ return self.ls(path, detail=detail, **kwargs)
1503
+
1504
+ def cp(self, path1, path2, **kwargs):
1505
+ """Alias of `AbstractFileSystem.copy`."""
1506
+ return self.copy(path1, path2, **kwargs)
1507
+
1508
+ def move(self, path1, path2, **kwargs):
1509
+ """Alias of `AbstractFileSystem.mv`."""
1510
+ return self.mv(path1, path2, **kwargs)
1511
+
1512
+ def stat(self, path, **kwargs):
1513
+ """Alias of `AbstractFileSystem.info`."""
1514
+ return self.info(path, **kwargs)
1515
+
1516
+ def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
1517
+ """Alias of `AbstractFileSystem.du`."""
1518
+ return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
1519
+
1520
+ def rename(self, path1, path2, **kwargs):
1521
+ """Alias of `AbstractFileSystem.mv`."""
1522
+ return self.mv(path1, path2, **kwargs)
1523
+
1524
+ def delete(self, path, recursive=False, maxdepth=None):
1525
+ """Alias of `AbstractFileSystem.rm`."""
1526
+ return self.rm(path, recursive=recursive, maxdepth=maxdepth)
1527
+
1528
+ def upload(self, lpath, rpath, recursive=False, **kwargs):
1529
+ """Alias of `AbstractFileSystem.put`."""
1530
+ return self.put(lpath, rpath, recursive=recursive, **kwargs)
1531
+
1532
+ def download(self, rpath, lpath, recursive=False, **kwargs):
1533
+ """Alias of `AbstractFileSystem.get`."""
1534
+ return self.get(rpath, lpath, recursive=recursive, **kwargs)
1535
+
1536
+ def sign(self, path, expiration=100, **kwargs):
1537
+ """Create a signed URL representing the given path
1538
+
1539
+ Some implementations allow temporary URLs to be generated, as a
1540
+ way of delegating credentials.
1541
+
1542
+ Parameters
1543
+ ----------
1544
+ path : str
1545
+ The path on the filesystem
1546
+ expiration : int
1547
+ Number of seconds to enable the URL for (if supported)
1548
+
1549
+ Returns
1550
+ -------
1551
+ URL : str
1552
+ The signed URL
1553
+
1554
+ Raises
1555
+ ------
1556
+ NotImplementedError : if method is not implemented for a filesystem
1557
+ """
1558
+ raise NotImplementedError("Sign is not implemented for this filesystem")
1559
+
1560
+ def _isfilestore(self):
1561
+ # Originally inherited from pyarrow DaskFileSystem. Keeping this
1562
+ # here for backwards compatibility as long as pyarrow uses its
1563
+ # legacy fsspec-compatible filesystems and thus accepts fsspec
1564
+ # filesystems as well
1565
+ return False
1566
+
1567
+
1568
+ class AbstractBufferedFile(io.IOBase):
1569
+ """Convenient class to derive from to provide buffering
1570
+
1571
+ In the case that the backend does not provide a pythonic file-like object
1572
+ already, this class contains much of the logic to build one. The only
1573
+ methods that need to be overridden are ``_upload_chunk``,
1574
+ ``_initiate_upload`` and ``_fetch_range``.
1575
+ """
1576
+
1577
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
1578
+ _details = None
1579
+
1580
+ def __init__(
1581
+ self,
1582
+ fs,
1583
+ path,
1584
+ mode="rb",
1585
+ block_size="default",
1586
+ autocommit=True,
1587
+ cache_type="readahead",
1588
+ cache_options=None,
1589
+ size=None,
1590
+ **kwargs,
1591
+ ):
1592
+ """
1593
+ Template for files with buffered reading and writing
1594
+
1595
+ Parameters
1596
+ ----------
1597
+ fs: instance of FileSystem
1598
+ path: str
1599
+ location in file-system
1600
+ mode: str
1601
+ Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
1602
+ systems may be read-only, and some may not support append.
1603
+ block_size: int
1604
+ Buffer size for reading or writing, 'default' for class default
1605
+ autocommit: bool
1606
+ Whether to write to final destination; may only impact what
1607
+ happens when file is being closed.
1608
+ cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
1609
+ Caching policy in read mode. See the definitions in ``core``.
1610
+ cache_options : dict
1611
+ Additional options passed to the constructor for the cache specified
1612
+ by `cache_type`.
1613
+ size: int
1614
+ If given and in read mode, suppressed having to look up the file size
1615
+ kwargs:
1616
+ Gets stored as self.kwargs
1617
+ """
1618
+ from .core import caches
1619
+
1620
+ self.path = path
1621
+ self.fs = fs
1622
+ self.mode = mode
1623
+ self.blocksize = (
1624
+ self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
1625
+ )
1626
+ self.loc = 0
1627
+ self.autocommit = autocommit
1628
+ self.end = None
1629
+ self.start = None
1630
+ self.closed = False
1631
+
1632
+ if cache_options is None:
1633
+ cache_options = {}
1634
+
1635
+ if "trim" in kwargs:
1636
+ warnings.warn(
1637
+ "Passing 'trim' to control the cache behavior has been deprecated. "
1638
+ "Specify it within the 'cache_options' argument instead.",
1639
+ FutureWarning,
1640
+ )
1641
+ cache_options["trim"] = kwargs.pop("trim")
1642
+
1643
+ self.kwargs = kwargs
1644
+
1645
+ if mode not in {"ab", "rb", "wb"}:
1646
+ raise NotImplementedError("File mode not supported")
1647
+ if mode == "rb":
1648
+ if size is not None:
1649
+ self.size = size
1650
+ else:
1651
+ self.size = self.details["size"]
1652
+ self.cache = caches[cache_type](
1653
+ self.blocksize, self._fetch_range, self.size, **cache_options
1654
+ )
1655
+ else:
1656
+ self.buffer = io.BytesIO()
1657
+ self.offset = None
1658
+ self.forced = False
1659
+ self.location = None
1660
+
1661
+ @property
1662
+ def details(self):
1663
+ if self._details is None:
1664
+ self._details = self.fs.info(self.path)
1665
+ return self._details
1666
+
1667
+ @details.setter
1668
+ def details(self, value):
1669
+ self._details = value
1670
+ self.size = value["size"]
1671
+
1672
+ @property
1673
+ def full_name(self):
1674
+ return _unstrip_protocol(self.path, self.fs)
1675
+
1676
+ @property
1677
+ def closed(self):
1678
+ # get around this attr being read-only in IOBase
1679
+ # use getattr here, since this can be called during del
1680
+ return getattr(self, "_closed", True)
1681
+
1682
+ @closed.setter
1683
+ def closed(self, c):
1684
+ self._closed = c
1685
+
1686
+ def __hash__(self):
1687
+ if "w" in self.mode:
1688
+ return id(self)
1689
+ else:
1690
+ return int(tokenize(self.details), 16)
1691
+
1692
+ def __eq__(self, other):
1693
+ """Files are equal if they have the same checksum, only in read mode"""
1694
+ return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
1695
+
1696
+ def commit(self):
1697
+ """Move from temp to final destination"""
1698
+
1699
+ def discard(self):
1700
+ """Throw away temporary file"""
1701
+
1702
+ def info(self):
1703
+ """File information about this path"""
1704
+ if "r" in self.mode:
1705
+ return self.details
1706
+ else:
1707
+ raise ValueError("Info not available while writing")
1708
+
1709
+ def tell(self):
1710
+ """Current file location"""
1711
+ return self.loc
1712
+
1713
+ def seek(self, loc, whence=0):
1714
+ """Set current file location
1715
+
1716
+ Parameters
1717
+ ----------
1718
+ loc: int
1719
+ byte location
1720
+ whence: {0, 1, 2}
1721
+ from start of file, current location or end of file, resp.
1722
+ """
1723
+ loc = int(loc)
1724
+ if not self.mode == "rb":
1725
+ raise OSError(ESPIPE, "Seek only available in read mode")
1726
+ if whence == 0:
1727
+ nloc = loc
1728
+ elif whence == 1:
1729
+ nloc = self.loc + loc
1730
+ elif whence == 2:
1731
+ nloc = self.size + loc
1732
+ else:
1733
+ raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
1734
+ if nloc < 0:
1735
+ raise ValueError("Seek before start of file")
1736
+ self.loc = nloc
1737
+ return self.loc
1738
+
1739
+ def write(self, data):
1740
+ """
1741
+ Write data to buffer.
1742
+
1743
+ Buffer only sent on flush() or if buffer is greater than
1744
+ or equal to blocksize.
1745
+
1746
+ Parameters
1747
+ ----------
1748
+ data: bytes
1749
+ Set of bytes to be written.
1750
+ """
1751
+ if self.mode not in {"wb", "ab"}:
1752
+ raise ValueError("File not in write mode")
1753
+ if self.closed:
1754
+ raise ValueError("I/O operation on closed file.")
1755
+ if self.forced:
1756
+ raise ValueError("This file has been force-flushed, can only close")
1757
+ out = self.buffer.write(data)
1758
+ self.loc += out
1759
+ if self.buffer.tell() >= self.blocksize:
1760
+ self.flush()
1761
+ return out
1762
+
1763
+ def flush(self, force=False):
1764
+ """
1765
+ Write buffered data to backend store.
1766
+
1767
+ Writes the current buffer, if it is larger than the block-size, or if
1768
+ the file is being closed.
1769
+
1770
+ Parameters
1771
+ ----------
1772
+ force: bool
1773
+ When closing, write the last block even if it is smaller than
1774
+ blocks are allowed to be. Disallows further writing to this file.
1775
+ """
1776
+
1777
+ if self.closed:
1778
+ raise ValueError("Flush on closed file")
1779
+ if force and self.forced:
1780
+ raise ValueError("Force flush cannot be called more than once")
1781
+ if force:
1782
+ self.forced = True
1783
+
1784
+ if self.mode not in {"wb", "ab"}:
1785
+ # no-op to flush on read-mode
1786
+ return
1787
+
1788
+ if not force and self.buffer.tell() < self.blocksize:
1789
+ # Defer write on small block
1790
+ return
1791
+
1792
+ if self.offset is None:
1793
+ # Initialize a multipart upload
1794
+ self.offset = 0
1795
+ try:
1796
+ self._initiate_upload()
1797
+ except: # noqa: E722
1798
+ self.closed = True
1799
+ raise
1800
+
1801
+ if self._upload_chunk(final=force) is not False:
1802
+ self.offset += self.buffer.seek(0, 2)
1803
+ self.buffer = io.BytesIO()
1804
+
1805
+ def _upload_chunk(self, final=False):
1806
+ """Write one part of a multi-block file upload
1807
+
1808
+ Parameters
1809
+ ==========
1810
+ final: bool
1811
+ This is the last block, so should complete file, if
1812
+ self.autocommit is True.
1813
+ """
1814
+ # may not yet have been initialized, may need to call _initialize_upload
1815
+
1816
+ def _initiate_upload(self):
1817
+ """Create remote file/upload"""
1818
+ pass
1819
+
1820
+ def _fetch_range(self, start, end):
1821
+ """Get the specified set of bytes from remote"""
1822
+ raise NotImplementedError
1823
+
1824
+ def read(self, length=-1):
1825
+ """
1826
+ Return data from cache, or fetch pieces as necessary
1827
+
1828
+ Parameters
1829
+ ----------
1830
+ length: int (-1)
1831
+ Number of bytes to read; if <0, all remaining bytes.
1832
+ """
1833
+ length = -1 if length is None else int(length)
1834
+ if self.mode != "rb":
1835
+ raise ValueError("File not in read mode")
1836
+ if length < 0:
1837
+ length = self.size - self.loc
1838
+ if self.closed:
1839
+ raise ValueError("I/O operation on closed file.")
1840
+ logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
1841
+ if length == 0:
1842
+ # don't even bother calling fetch
1843
+ return b""
1844
+ out = self.cache._fetch(self.loc, self.loc + length)
1845
+ self.loc += len(out)
1846
+ return out
1847
+
1848
+ def readinto(self, b):
1849
+ """mirrors builtin file's readinto method
1850
+
1851
+ https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
1852
+ """
1853
+ out = memoryview(b).cast("B")
1854
+ data = self.read(out.nbytes)
1855
+ out[: len(data)] = data
1856
+ return len(data)
1857
+
1858
+ def readuntil(self, char=b"\n", blocks=None):
1859
+ """Return data between current position and first occurrence of char
1860
+
1861
+ char is included in the output, except if the end of the tile is
1862
+ encountered first.
1863
+
1864
+ Parameters
1865
+ ----------
1866
+ char: bytes
1867
+ Thing to find
1868
+ blocks: None or int
1869
+ How much to read in each go. Defaults to file blocksize - which may
1870
+ mean a new read on every call.
1871
+ """
1872
+ out = []
1873
+ while True:
1874
+ start = self.tell()
1875
+ part = self.read(blocks or self.blocksize)
1876
+ if len(part) == 0:
1877
+ break
1878
+ found = part.find(char)
1879
+ if found > -1:
1880
+ out.append(part[: found + len(char)])
1881
+ self.seek(start + found + len(char))
1882
+ break
1883
+ out.append(part)
1884
+ return b"".join(out)
1885
+
1886
+ def readline(self):
1887
+ """Read until first occurrence of newline character
1888
+
1889
+ Note that, because of character encoding, this is not necessarily a
1890
+ true line ending.
1891
+ """
1892
+ return self.readuntil(b"\n")
1893
+
1894
+ def __next__(self):
1895
+ out = self.readline()
1896
+ if out:
1897
+ return out
1898
+ raise StopIteration
1899
+
1900
+ def __iter__(self):
1901
+ return self
1902
+
1903
+ def readlines(self):
1904
+ """Return all data, split by the newline character"""
1905
+ data = self.read()
1906
+ lines = data.split(b"\n")
1907
+ out = [l + b"\n" for l in lines[:-1]]
1908
+ if data.endswith(b"\n"):
1909
+ return out
1910
+ else:
1911
+ return out + [lines[-1]]
1912
+ # return list(self) ???
1913
+
1914
+ def readinto1(self, b):
1915
+ return self.readinto(b)
1916
+
1917
+ def close(self):
1918
+ """Close file
1919
+
1920
+ Finalizes writes, discards cache
1921
+ """
1922
+ if getattr(self, "_unclosable", False):
1923
+ return
1924
+ if self.closed:
1925
+ return
1926
+ if self.mode == "rb":
1927
+ self.cache = None
1928
+ else:
1929
+ if not self.forced:
1930
+ self.flush(force=True)
1931
+
1932
+ if self.fs is not None:
1933
+ self.fs.invalidate_cache(self.path)
1934
+ self.fs.invalidate_cache(self.fs._parent(self.path))
1935
+
1936
+ self.closed = True
1937
+
1938
+ def readable(self):
1939
+ """Whether opened for reading"""
1940
+ return self.mode == "rb" and not self.closed
1941
+
1942
+ def seekable(self):
1943
+ """Whether is seekable (only in read mode)"""
1944
+ return self.readable()
1945
+
1946
+ def writable(self):
1947
+ """Whether opened for writing"""
1948
+ return self.mode in {"wb", "ab"} and not self.closed
1949
+
1950
+ def __del__(self):
1951
+ if not self.closed:
1952
+ self.close()
1953
+
1954
+ def __str__(self):
1955
+ return f"<File-like object {type(self.fs).__name__}, {self.path}>"
1956
+
1957
+ __repr__ = __str__
1958
+
1959
+ def __enter__(self):
1960
+ return self
1961
+
1962
+ def __exit__(self, *args):
1963
+ self.close()
lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from hashlib import md5
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import LocalFileSystem
7
+ from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
8
+ from fsspec.tests.abstract.get import AbstractGetTests # noqa
9
+ from fsspec.tests.abstract.put import AbstractPutTests # noqa
10
+
11
+
12
+ class BaseAbstractFixtures:
13
+ """
14
+ Abstract base class containing fixtures that are used by but never need to
15
+ be overridden in derived filesystem-specific classes to run the abstract
16
+ tests on such filesystems.
17
+ """
18
+
19
+ @pytest.fixture
20
+ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
21
+ """
22
+ Scenario on remote filesystem that is used for many cp/get/put tests.
23
+
24
+ Cleans up at the end of each test it which it is used.
25
+ """
26
+ source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
27
+ yield source
28
+ fs.rm(source, recursive=True)
29
+
30
+ @pytest.fixture
31
+ def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
32
+ """
33
+ Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
34
+
35
+ Cleans up at the end of each test it which it is used.
36
+ """
37
+ source = self._glob_edge_cases_files(fs, fs_join, fs_path)
38
+ yield source
39
+ fs.rm(source, recursive=True)
40
+
41
+ @pytest.fixture
42
+ def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
43
+ """
44
+ Scenario on remote filesystem that is used to check cp/get/put on directory
45
+ and file with the same name prefixes.
46
+
47
+ Cleans up at the end of each test it which it is used.
48
+ """
49
+ source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
50
+ yield source
51
+ fs.rm(source, recursive=True)
52
+
53
+ @pytest.fixture
54
+ def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
55
+ """
56
+ Scenario on remote filesystem that is used to check cp/get/put files order
57
+ when source and destination are lists.
58
+
59
+ Cleans up at the end of each test it which it is used.
60
+ """
61
+ source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
62
+ yield source
63
+ fs.rm(source, recursive=True)
64
+
65
+ @pytest.fixture
66
+ def fs_target(self, fs, fs_join, fs_path):
67
+ """
68
+ Return name of remote directory that does not yet exist to copy into.
69
+
70
+ Cleans up at the end of each test it which it is used.
71
+ """
72
+ target = fs_join(fs_path, "target")
73
+ yield target
74
+ if fs.exists(target):
75
+ fs.rm(target, recursive=True)
76
+
77
+ @pytest.fixture
78
+ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
79
+ """
80
+ Scenario on local filesystem that is used for many cp/get/put tests.
81
+
82
+ Cleans up at the end of each test it which it is used.
83
+ """
84
+ source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
85
+ yield source
86
+ local_fs.rm(source, recursive=True)
87
+
88
+ @pytest.fixture
89
+ def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
90
+ """
91
+ Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
92
+
93
+ Cleans up at the end of each test it which it is used.
94
+ """
95
+ source = self._glob_edge_cases_files(local_fs, local_join, local_path)
96
+ yield source
97
+ local_fs.rm(source, recursive=True)
98
+
99
+ @pytest.fixture
100
+ def local_dir_and_file_with_same_name_prefix(
101
+ self, local_fs, local_join, local_path
102
+ ):
103
+ """
104
+ Scenario on local filesystem that is used to check cp/get/put on directory
105
+ and file with the same name prefixes.
106
+
107
+ Cleans up at the end of each test it which it is used.
108
+ """
109
+ source = self._dir_and_file_with_same_name_prefix(
110
+ local_fs, local_join, local_path
111
+ )
112
+ yield source
113
+ local_fs.rm(source, recursive=True)
114
+
115
+ @pytest.fixture
116
+ def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
117
+ """
118
+ Scenario on local filesystem that is used to check cp/get/put files order
119
+ when source and destination are lists.
120
+
121
+ Cleans up at the end of each test it which it is used.
122
+ """
123
+ source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
124
+ yield source
125
+ local_fs.rm(source, recursive=True)
126
+
127
+ @pytest.fixture
128
+ def local_target(self, local_fs, local_join, local_path):
129
+ """
130
+ Return name of local directory that does not yet exist to copy into.
131
+
132
+ Cleans up at the end of each test it which it is used.
133
+ """
134
+ target = local_join(local_path, "target")
135
+ yield target
136
+ if local_fs.exists(target):
137
+ local_fs.rm(target, recursive=True)
138
+
139
+ def _glob_edge_cases_files(self, some_fs, some_join, some_path):
140
+ """
141
+ Scenario that is used for glob edge cases cp/get/put tests.
142
+ Creates the following directory and file structure:
143
+
144
+ 📁 source
145
+ ├── 📄 file1
146
+ ├── 📄 file2
147
+ ├── 📁 subdir0
148
+ │ ├── 📄 subfile1
149
+ │ ├── 📄 subfile2
150
+ │ └── 📁 nesteddir
151
+ │ └── 📄 nestedfile
152
+ └── 📁 subdir1
153
+ ├── 📄 subfile1
154
+ ├── 📄 subfile2
155
+ └── 📁 nesteddir
156
+ └── 📄 nestedfile
157
+ """
158
+ source = some_join(some_path, "source")
159
+ some_fs.touch(some_join(source, "file1"))
160
+ some_fs.touch(some_join(source, "file2"))
161
+
162
+ for subdir_idx in range(2):
163
+ subdir = some_join(source, f"subdir{subdir_idx}")
164
+ nesteddir = some_join(subdir, "nesteddir")
165
+ some_fs.makedirs(nesteddir)
166
+ some_fs.touch(some_join(subdir, "subfile1"))
167
+ some_fs.touch(some_join(subdir, "subfile2"))
168
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
169
+
170
+ return source
171
+
172
+ def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
173
+ """
174
+ Scenario that is used for many cp/get/put tests. Creates the following
175
+ directory and file structure:
176
+
177
+ 📁 source
178
+ ├── 📄 file1
179
+ ├── 📄 file2
180
+ └── 📁 subdir
181
+ ├── 📄 subfile1
182
+ ├── 📄 subfile2
183
+ └── 📁 nesteddir
184
+ └── 📄 nestedfile
185
+ """
186
+ source = some_join(some_path, "source")
187
+ subdir = some_join(source, "subdir")
188
+ nesteddir = some_join(subdir, "nesteddir")
189
+ some_fs.makedirs(nesteddir)
190
+ some_fs.touch(some_join(source, "file1"))
191
+ some_fs.touch(some_join(source, "file2"))
192
+ some_fs.touch(some_join(subdir, "subfile1"))
193
+ some_fs.touch(some_join(subdir, "subfile2"))
194
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
195
+ return source
196
+
197
+ def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
198
+ """
199
+ Scenario that is used to check cp/get/put on directory and file with
200
+ the same name prefixes. Creates the following directory and file structure:
201
+
202
+ 📁 source
203
+ ├── 📄 subdir.txt
204
+ └── 📁 subdir
205
+ └── 📄 subfile.txt
206
+ """
207
+ source = some_join(some_path, "source")
208
+ subdir = some_join(source, "subdir")
209
+ file = some_join(source, "subdir.txt")
210
+ subfile = some_join(subdir, "subfile.txt")
211
+ some_fs.makedirs(subdir)
212
+ some_fs.touch(file)
213
+ some_fs.touch(subfile)
214
+ return source
215
+
216
+ def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
217
+ """
218
+ Scenario that is used to check cp/get/put files order when source and
219
+ destination are lists. Creates the following directory and file structure:
220
+
221
+ 📁 source
222
+ └── 📄 {hashed([0-9])}.txt
223
+ """
224
+ source = some_join(some_path, "source")
225
+ for i in range(10):
226
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
227
+ path = some_join(source, f"{hashed_i}.txt")
228
+ some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
229
+ return source
230
+
231
+
232
+ class AbstractFixtures(BaseAbstractFixtures):
233
+ """
234
+ Abstract base class containing fixtures that may be overridden in derived
235
+ filesystem-specific classes to run the abstract tests on such filesystems.
236
+
237
+ For any particular filesystem some of these fixtures must be overridden,
238
+ such as ``fs`` and ``fs_path``, and others may be overridden if the
239
+ default functions here are not appropriate, such as ``fs_join``.
240
+ """
241
+
242
+ @pytest.fixture
243
+ def fs(self):
244
+ raise NotImplementedError("This function must be overridden in derived classes")
245
+
246
+ @pytest.fixture
247
+ def fs_join(self):
248
+ """
249
+ Return a function that joins its arguments together into a path.
250
+
251
+ Most fsspec implementations join paths in a platform-dependent way,
252
+ but some will override this to always use a forward slash.
253
+ """
254
+ return os.path.join
255
+
256
+ @pytest.fixture
257
+ def fs_path(self):
258
+ raise NotImplementedError("This function must be overridden in derived classes")
259
+
260
+ @pytest.fixture(scope="class")
261
+ def local_fs(self):
262
+ # Maybe need an option for auto_mkdir=False? This is only relevant
263
+ # for certain implementations.
264
+ return LocalFileSystem(auto_mkdir=True)
265
+
266
+ @pytest.fixture
267
+ def local_join(self):
268
+ """
269
+ Return a function that joins its arguments together into a path, on
270
+ the local filesystem.
271
+ """
272
+ return os.path.join
273
+
274
+ @pytest.fixture
275
+ def local_path(self, tmpdir):
276
+ return tmpdir
277
+
278
+ @pytest.fixture
279
+ def supports_empty_directories(self):
280
+ """
281
+ Return whether this implementation supports empty directories.
282
+ """
283
+ return True
284
+
285
+ @pytest.fixture
286
+ def fs_sanitize_path(self):
287
+ return lambda x: x
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (15 kB). View file
 
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc ADDED
Binary file (2.33 kB). View file
 
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc ADDED
Binary file (26.5 kB). View file
 
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc ADDED
Binary file (26.3 kB). View file
 
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc ADDED
Binary file (27.7 kB). View file
 
lib/python3.11/site-packages/fsspec/tests/abstract/common.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GLOB_EDGE_CASES_TESTS = {
2
+ "argnames": ("path", "recursive", "maxdepth", "expected"),
3
+ "argvalues": [
4
+ ("fil?1", False, None, ["file1"]),
5
+ ("fil?1", True, None, ["file1"]),
6
+ ("file[1-2]", False, None, ["file1", "file2"]),
7
+ ("file[1-2]", True, None, ["file1", "file2"]),
8
+ ("*", False, None, ["file1", "file2"]),
9
+ (
10
+ "*",
11
+ True,
12
+ None,
13
+ [
14
+ "file1",
15
+ "file2",
16
+ "subdir0/subfile1",
17
+ "subdir0/subfile2",
18
+ "subdir0/nesteddir/nestedfile",
19
+ "subdir1/subfile1",
20
+ "subdir1/subfile2",
21
+ "subdir1/nesteddir/nestedfile",
22
+ ],
23
+ ),
24
+ ("*", True, 1, ["file1", "file2"]),
25
+ (
26
+ "*",
27
+ True,
28
+ 2,
29
+ [
30
+ "file1",
31
+ "file2",
32
+ "subdir0/subfile1",
33
+ "subdir0/subfile2",
34
+ "subdir1/subfile1",
35
+ "subdir1/subfile2",
36
+ ],
37
+ ),
38
+ ("*1", False, None, ["file1"]),
39
+ (
40
+ "*1",
41
+ True,
42
+ None,
43
+ [
44
+ "file1",
45
+ "subdir1/subfile1",
46
+ "subdir1/subfile2",
47
+ "subdir1/nesteddir/nestedfile",
48
+ ],
49
+ ),
50
+ ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
51
+ (
52
+ "**",
53
+ False,
54
+ None,
55
+ [
56
+ "file1",
57
+ "file2",
58
+ "subdir0/subfile1",
59
+ "subdir0/subfile2",
60
+ "subdir0/nesteddir/nestedfile",
61
+ "subdir1/subfile1",
62
+ "subdir1/subfile2",
63
+ "subdir1/nesteddir/nestedfile",
64
+ ],
65
+ ),
66
+ (
67
+ "**",
68
+ True,
69
+ None,
70
+ [
71
+ "file1",
72
+ "file2",
73
+ "subdir0/subfile1",
74
+ "subdir0/subfile2",
75
+ "subdir0/nesteddir/nestedfile",
76
+ "subdir1/subfile1",
77
+ "subdir1/subfile2",
78
+ "subdir1/nesteddir/nestedfile",
79
+ ],
80
+ ),
81
+ ("**", True, 1, ["file1", "file2"]),
82
+ (
83
+ "**",
84
+ True,
85
+ 2,
86
+ [
87
+ "file1",
88
+ "file2",
89
+ "subdir0/subfile1",
90
+ "subdir0/subfile2",
91
+ "subdir0/nesteddir/nestedfile",
92
+ "subdir1/subfile1",
93
+ "subdir1/subfile2",
94
+ "subdir1/nesteddir/nestedfile",
95
+ ],
96
+ ),
97
+ (
98
+ "**",
99
+ False,
100
+ 2,
101
+ [
102
+ "file1",
103
+ "file2",
104
+ "subdir0/subfile1",
105
+ "subdir0/subfile2",
106
+ "subdir1/subfile1",
107
+ "subdir1/subfile2",
108
+ ],
109
+ ),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
+ (
112
+ "**/*1",
113
+ True,
114
+ None,
115
+ [
116
+ "file1",
117
+ "subdir0/subfile1",
118
+ "subdir1/subfile1",
119
+ "subdir1/subfile2",
120
+ "subdir1/nesteddir/nestedfile",
121
+ ],
122
+ ),
123
+ ("**/*1", True, 1, ["file1"]),
124
+ (
125
+ "**/*1",
126
+ True,
127
+ 2,
128
+ ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
+ ),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
+ ("**/subdir0", False, None, []),
132
+ ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
+ ("**/subdir0/nested*", False, 2, []),
134
+ ("**/subdir0/nested*", True, 2, ["nestedfile"]),
135
+ ("subdir[1-2]", False, None, []),
136
+ ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
137
+ ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
138
+ ("subdir[0-1]", False, None, []),
139
+ (
140
+ "subdir[0-1]",
141
+ True,
142
+ None,
143
+ [
144
+ "subdir0/subfile1",
145
+ "subdir0/subfile2",
146
+ "subdir0/nesteddir/nestedfile",
147
+ "subdir1/subfile1",
148
+ "subdir1/subfile2",
149
+ "subdir1/nesteddir/nestedfile",
150
+ ],
151
+ ),
152
+ (
153
+ "subdir[0-1]/*fil[e]*",
154
+ False,
155
+ None,
156
+ [
157
+ "subdir0/subfile1",
158
+ "subdir0/subfile2",
159
+ "subdir1/subfile1",
160
+ "subdir1/subfile2",
161
+ ],
162
+ ),
163
+ (
164
+ "subdir[0-1]/*fil[e]*",
165
+ True,
166
+ None,
167
+ [
168
+ "subdir0/subfile1",
169
+ "subdir0/subfile2",
170
+ "subdir1/subfile1",
171
+ "subdir1/subfile2",
172
+ ],
173
+ ),
174
+ ],
175
+ }
lib/python3.11/site-packages/fsspec/tests/abstract/copy.py ADDED
@@ -0,0 +1,543 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractCopyTests:
10
+ def test_copy_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_bulk_operations_scenario_0,
15
+ fs_target,
16
+ supports_empty_directories,
17
+ ):
18
+ # Copy scenario 1a
19
+ source = fs_bulk_operations_scenario_0
20
+
21
+ target = fs_target
22
+ fs.mkdir(target)
23
+ if not supports_empty_directories:
24
+ # Force target directory to exist by adding a dummy file
25
+ fs.touch(fs_join(target, "dummy"))
26
+ assert fs.isdir(target)
27
+
28
+ target_file2 = fs_join(target, "file2")
29
+ target_subfile1 = fs_join(target, "subfile1")
30
+
31
+ # Copy from source directory
32
+ fs.cp(fs_join(source, "file2"), target)
33
+ assert fs.isfile(target_file2)
34
+
35
+ # Copy from sub directory
36
+ fs.cp(fs_join(source, "subdir", "subfile1"), target)
37
+ assert fs.isfile(target_subfile1)
38
+
39
+ # Remove copied files
40
+ fs.rm([target_file2, target_subfile1])
41
+ assert not fs.exists(target_file2)
42
+ assert not fs.exists(target_subfile1)
43
+
44
+ # Repeat with trailing slash on target
45
+ fs.cp(fs_join(source, "file2"), target + "/")
46
+ assert fs.isdir(target)
47
+ assert fs.isfile(target_file2)
48
+
49
+ fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
50
+ assert fs.isfile(target_subfile1)
51
+
52
+ def test_copy_file_to_new_directory(
53
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
54
+ ):
55
+ # Copy scenario 1b
56
+ source = fs_bulk_operations_scenario_0
57
+
58
+ target = fs_target
59
+ fs.mkdir(target)
60
+
61
+ fs.cp(
62
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
63
+ ) # Note trailing slash
64
+ assert fs.isdir(target)
65
+ assert fs.isdir(fs_join(target, "newdir"))
66
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
67
+
68
+ def test_copy_file_to_file_in_existing_directory(
69
+ self,
70
+ fs,
71
+ fs_join,
72
+ fs_bulk_operations_scenario_0,
73
+ fs_target,
74
+ supports_empty_directories,
75
+ ):
76
+ # Copy scenario 1c
77
+ source = fs_bulk_operations_scenario_0
78
+
79
+ target = fs_target
80
+ fs.mkdir(target)
81
+ if not supports_empty_directories:
82
+ # Force target directory to exist by adding a dummy file
83
+ fs.touch(fs_join(target, "dummy"))
84
+ assert fs.isdir(target)
85
+
86
+ fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
87
+ assert fs.isfile(fs_join(target, "newfile"))
88
+
89
+ def test_copy_file_to_file_in_new_directory(
90
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
91
+ ):
92
+ # Copy scenario 1d
93
+ source = fs_bulk_operations_scenario_0
94
+
95
+ target = fs_target
96
+ fs.mkdir(target)
97
+
98
+ fs.cp(
99
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
100
+ )
101
+ assert fs.isdir(fs_join(target, "newdir"))
102
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
103
+
104
+ def test_copy_directory_to_existing_directory(
105
+ self,
106
+ fs,
107
+ fs_join,
108
+ fs_bulk_operations_scenario_0,
109
+ fs_target,
110
+ supports_empty_directories,
111
+ ):
112
+ # Copy scenario 1e
113
+ source = fs_bulk_operations_scenario_0
114
+
115
+ target = fs_target
116
+ fs.mkdir(target)
117
+ if not supports_empty_directories:
118
+ # Force target directory to exist by adding a dummy file
119
+ dummy = fs_join(target, "dummy")
120
+ fs.touch(dummy)
121
+ assert fs.isdir(target)
122
+
123
+ for source_slash, target_slash in zip([False, True], [False, True]):
124
+ s = fs_join(source, "subdir")
125
+ if source_slash:
126
+ s += "/"
127
+ t = target + "/" if target_slash else target
128
+
129
+ # Without recursive does nothing
130
+ fs.cp(s, t)
131
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
132
+
133
+ # With recursive
134
+ fs.cp(s, t, recursive=True)
135
+ if source_slash:
136
+ assert fs.isfile(fs_join(target, "subfile1"))
137
+ assert fs.isfile(fs_join(target, "subfile2"))
138
+ assert fs.isdir(fs_join(target, "nesteddir"))
139
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
140
+ assert not fs.exists(fs_join(target, "subdir"))
141
+
142
+ fs.rm(
143
+ [
144
+ fs_join(target, "subfile1"),
145
+ fs_join(target, "subfile2"),
146
+ fs_join(target, "nesteddir"),
147
+ ],
148
+ recursive=True,
149
+ )
150
+ else:
151
+ assert fs.isdir(fs_join(target, "subdir"))
152
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
153
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
154
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
155
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
156
+
157
+ fs.rm(fs_join(target, "subdir"), recursive=True)
158
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
159
+
160
+ # Limit recursive by maxdepth
161
+ fs.cp(s, t, recursive=True, maxdepth=1)
162
+ if source_slash:
163
+ assert fs.isfile(fs_join(target, "subfile1"))
164
+ assert fs.isfile(fs_join(target, "subfile2"))
165
+ assert not fs.exists(fs_join(target, "nesteddir"))
166
+ assert not fs.exists(fs_join(target, "subdir"))
167
+
168
+ fs.rm(
169
+ [
170
+ fs_join(target, "subfile1"),
171
+ fs_join(target, "subfile2"),
172
+ ],
173
+ recursive=True,
174
+ )
175
+ else:
176
+ assert fs.isdir(fs_join(target, "subdir"))
177
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
178
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
179
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
180
+
181
+ fs.rm(fs_join(target, "subdir"), recursive=True)
182
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
183
+
184
+ def test_copy_directory_to_new_directory(
185
+ self,
186
+ fs,
187
+ fs_join,
188
+ fs_bulk_operations_scenario_0,
189
+ fs_target,
190
+ supports_empty_directories,
191
+ ):
192
+ # Copy scenario 1f
193
+ source = fs_bulk_operations_scenario_0
194
+
195
+ target = fs_target
196
+ fs.mkdir(target)
197
+
198
+ for source_slash, target_slash in zip([False, True], [False, True]):
199
+ s = fs_join(source, "subdir")
200
+ if source_slash:
201
+ s += "/"
202
+ t = fs_join(target, "newdir")
203
+ if target_slash:
204
+ t += "/"
205
+
206
+ # Without recursive does nothing
207
+ fs.cp(s, t)
208
+ if supports_empty_directories:
209
+ assert fs.ls(target) == []
210
+ else:
211
+ with pytest.raises(FileNotFoundError):
212
+ fs.ls(target)
213
+
214
+ # With recursive
215
+ fs.cp(s, t, recursive=True)
216
+ assert fs.isdir(fs_join(target, "newdir"))
217
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
218
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
219
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
220
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
221
+ assert not fs.exists(fs_join(target, "subdir"))
222
+
223
+ fs.rm(fs_join(target, "newdir"), recursive=True)
224
+ assert not fs.exists(fs_join(target, "newdir"))
225
+
226
+ # Limit recursive by maxdepth
227
+ fs.cp(s, t, recursive=True, maxdepth=1)
228
+ assert fs.isdir(fs_join(target, "newdir"))
229
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
230
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
231
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
232
+ assert not fs.exists(fs_join(target, "subdir"))
233
+
234
+ fs.rm(fs_join(target, "newdir"), recursive=True)
235
+ assert not fs.exists(fs_join(target, "newdir"))
236
+
237
+ def test_copy_glob_to_existing_directory(
238
+ self,
239
+ fs,
240
+ fs_join,
241
+ fs_bulk_operations_scenario_0,
242
+ fs_target,
243
+ supports_empty_directories,
244
+ ):
245
+ # Copy scenario 1g
246
+ source = fs_bulk_operations_scenario_0
247
+
248
+ target = fs_target
249
+ fs.mkdir(target)
250
+ if not supports_empty_directories:
251
+ # Force target directory to exist by adding a dummy file
252
+ dummy = fs_join(target, "dummy")
253
+ fs.touch(dummy)
254
+ assert fs.isdir(target)
255
+
256
+ for target_slash in [False, True]:
257
+ t = target + "/" if target_slash else target
258
+
259
+ # Without recursive
260
+ fs.cp(fs_join(source, "subdir", "*"), t)
261
+ assert fs.isfile(fs_join(target, "subfile1"))
262
+ assert fs.isfile(fs_join(target, "subfile2"))
263
+ assert not fs.isdir(fs_join(target, "nesteddir"))
264
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
265
+ assert not fs.exists(fs_join(target, "subdir"))
266
+
267
+ fs.rm(
268
+ [
269
+ fs_join(target, "subfile1"),
270
+ fs_join(target, "subfile2"),
271
+ ],
272
+ recursive=True,
273
+ )
274
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
275
+
276
+ # With recursive
277
+ for glob, recursive in zip(["*", "**"], [True, False]):
278
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
279
+ assert fs.isfile(fs_join(target, "subfile1"))
280
+ assert fs.isfile(fs_join(target, "subfile2"))
281
+ assert fs.isdir(fs_join(target, "nesteddir"))
282
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
283
+ assert not fs.exists(fs_join(target, "subdir"))
284
+
285
+ fs.rm(
286
+ [
287
+ fs_join(target, "subfile1"),
288
+ fs_join(target, "subfile2"),
289
+ fs_join(target, "nesteddir"),
290
+ ],
291
+ recursive=True,
292
+ )
293
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
294
+
295
+ # Limit recursive by maxdepth
296
+ fs.cp(
297
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
298
+ )
299
+ assert fs.isfile(fs_join(target, "subfile1"))
300
+ assert fs.isfile(fs_join(target, "subfile2"))
301
+ assert not fs.exists(fs_join(target, "nesteddir"))
302
+ assert not fs.exists(fs_join(target, "subdir"))
303
+
304
+ fs.rm(
305
+ [
306
+ fs_join(target, "subfile1"),
307
+ fs_join(target, "subfile2"),
308
+ ],
309
+ recursive=True,
310
+ )
311
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
312
+
313
+ def test_copy_glob_to_new_directory(
314
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
315
+ ):
316
+ # Copy scenario 1h
317
+ source = fs_bulk_operations_scenario_0
318
+
319
+ target = fs_target
320
+ fs.mkdir(target)
321
+
322
+ for target_slash in [False, True]:
323
+ t = fs_join(target, "newdir")
324
+ if target_slash:
325
+ t += "/"
326
+
327
+ # Without recursive
328
+ fs.cp(fs_join(source, "subdir", "*"), t)
329
+ assert fs.isdir(fs_join(target, "newdir"))
330
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
331
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
332
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
333
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
334
+ assert not fs.exists(fs_join(target, "subdir"))
335
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
336
+
337
+ fs.rm(fs_join(target, "newdir"), recursive=True)
338
+ assert not fs.exists(fs_join(target, "newdir"))
339
+
340
+ # With recursive
341
+ for glob, recursive in zip(["*", "**"], [True, False]):
342
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
343
+ assert fs.isdir(fs_join(target, "newdir"))
344
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
345
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
346
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
347
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
348
+ assert not fs.exists(fs_join(target, "subdir"))
349
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
350
+
351
+ fs.rm(fs_join(target, "newdir"), recursive=True)
352
+ assert not fs.exists(fs_join(target, "newdir"))
353
+
354
+ # Limit recursive by maxdepth
355
+ fs.cp(
356
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
357
+ )
358
+ assert fs.isdir(fs_join(target, "newdir"))
359
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
360
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
361
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
362
+ assert not fs.exists(fs_join(target, "subdir"))
363
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
364
+
365
+ fs.rm(fs_join(target, "newdir"), recursive=True)
366
+ assert not fs.exists(fs_join(target, "newdir"))
367
+
368
+ @pytest.mark.parametrize(
369
+ GLOB_EDGE_CASES_TESTS["argnames"],
370
+ GLOB_EDGE_CASES_TESTS["argvalues"],
371
+ )
372
+ def test_copy_glob_edge_cases(
373
+ self,
374
+ path,
375
+ recursive,
376
+ maxdepth,
377
+ expected,
378
+ fs,
379
+ fs_join,
380
+ fs_glob_edge_cases_files,
381
+ fs_target,
382
+ fs_sanitize_path,
383
+ ):
384
+ # Copy scenario 1g
385
+ source = fs_glob_edge_cases_files
386
+
387
+ target = fs_target
388
+
389
+ for new_dir, target_slash in product([True, False], [True, False]):
390
+ fs.mkdir(target)
391
+
392
+ t = fs_join(target, "newdir") if new_dir else target
393
+ t = t + "/" if target_slash else t
394
+
395
+ fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
396
+
397
+ output = fs.find(target)
398
+ if new_dir:
399
+ prefixed_expected = [
400
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
401
+ ]
402
+ else:
403
+ prefixed_expected = [
404
+ fs_sanitize_path(fs_join(target, p)) for p in expected
405
+ ]
406
+ assert sorted(output) == sorted(prefixed_expected)
407
+
408
+ try:
409
+ fs.rm(target, recursive=True)
410
+ except FileNotFoundError:
411
+ pass
412
+
413
+ def test_copy_list_of_files_to_existing_directory(
414
+ self,
415
+ fs,
416
+ fs_join,
417
+ fs_bulk_operations_scenario_0,
418
+ fs_target,
419
+ supports_empty_directories,
420
+ ):
421
+ # Copy scenario 2a
422
+ source = fs_bulk_operations_scenario_0
423
+
424
+ target = fs_target
425
+ fs.mkdir(target)
426
+ if not supports_empty_directories:
427
+ # Force target directory to exist by adding a dummy file
428
+ dummy = fs_join(target, "dummy")
429
+ fs.touch(dummy)
430
+ assert fs.isdir(target)
431
+
432
+ source_files = [
433
+ fs_join(source, "file1"),
434
+ fs_join(source, "file2"),
435
+ fs_join(source, "subdir", "subfile1"),
436
+ ]
437
+
438
+ for target_slash in [False, True]:
439
+ t = target + "/" if target_slash else target
440
+
441
+ fs.cp(source_files, t)
442
+ assert fs.isfile(fs_join(target, "file1"))
443
+ assert fs.isfile(fs_join(target, "file2"))
444
+ assert fs.isfile(fs_join(target, "subfile1"))
445
+
446
+ fs.rm(
447
+ [
448
+ fs_join(target, "file1"),
449
+ fs_join(target, "file2"),
450
+ fs_join(target, "subfile1"),
451
+ ],
452
+ recursive=True,
453
+ )
454
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
455
+
456
+ def test_copy_list_of_files_to_new_directory(
457
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
458
+ ):
459
+ # Copy scenario 2b
460
+ source = fs_bulk_operations_scenario_0
461
+
462
+ target = fs_target
463
+ fs.mkdir(target)
464
+
465
+ source_files = [
466
+ fs_join(source, "file1"),
467
+ fs_join(source, "file2"),
468
+ fs_join(source, "subdir", "subfile1"),
469
+ ]
470
+
471
+ fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
472
+ assert fs.isdir(fs_join(target, "newdir"))
473
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
474
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
475
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
476
+
477
+ def test_copy_two_files_new_directory(
478
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
479
+ ):
480
+ # This is a duplicate of test_copy_list_of_files_to_new_directory and
481
+ # can eventually be removed.
482
+ source = fs_bulk_operations_scenario_0
483
+
484
+ target = fs_target
485
+ assert not fs.exists(target)
486
+ fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
487
+
488
+ assert fs.isdir(target)
489
+ assert fs.isfile(fs_join(target, "file1"))
490
+ assert fs.isfile(fs_join(target, "file2"))
491
+
492
+ def test_copy_directory_without_files_with_same_name_prefix(
493
+ self,
494
+ fs,
495
+ fs_join,
496
+ fs_target,
497
+ fs_dir_and_file_with_same_name_prefix,
498
+ supports_empty_directories,
499
+ ):
500
+ # Create the test dirs
501
+ source = fs_dir_and_file_with_same_name_prefix
502
+ target = fs_target
503
+
504
+ # Test without glob
505
+ fs.cp(fs_join(source, "subdir"), target, recursive=True)
506
+
507
+ assert fs.isfile(fs_join(target, "subfile.txt"))
508
+ assert not fs.isfile(fs_join(target, "subdir.txt"))
509
+
510
+ fs.rm([fs_join(target, "subfile.txt")])
511
+ if supports_empty_directories:
512
+ assert fs.ls(target) == []
513
+ else:
514
+ assert not fs.exists(target)
515
+
516
+ # Test with glob
517
+ fs.cp(fs_join(source, "subdir*"), target, recursive=True)
518
+
519
+ assert fs.isdir(fs_join(target, "subdir"))
520
+ assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
521
+ assert fs.isfile(fs_join(target, "subdir.txt"))
522
+
523
+ def test_copy_with_source_and_destination_as_list(
524
+ self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
525
+ ):
526
+ # Create the test dir
527
+ source = fs_10_files_with_hashed_names
528
+ target = fs_target
529
+
530
+ # Create list of files for source and destination
531
+ source_files = []
532
+ destination_files = []
533
+ for i in range(10):
534
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
535
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
536
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
537
+
538
+ # Copy and assert order was kept
539
+ fs.copy(path1=source_files, path2=destination_files)
540
+
541
+ for i in range(10):
542
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
543
+ assert file_content == str(i)
lib/python3.11/site-packages/fsspec/tests/abstract/get.py ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import make_path_posix
7
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
8
+
9
+
10
+ class AbstractGetTests:
11
+ def test_get_file_to_existing_directory(
12
+ self,
13
+ fs,
14
+ fs_join,
15
+ fs_bulk_operations_scenario_0,
16
+ local_fs,
17
+ local_join,
18
+ local_target,
19
+ ):
20
+ # Copy scenario 1a
21
+ source = fs_bulk_operations_scenario_0
22
+
23
+ target = local_target
24
+ local_fs.mkdir(target)
25
+ assert local_fs.isdir(target)
26
+
27
+ target_file2 = local_join(target, "file2")
28
+ target_subfile1 = local_join(target, "subfile1")
29
+
30
+ # Copy from source directory
31
+ fs.get(fs_join(source, "file2"), target)
32
+ assert local_fs.isfile(target_file2)
33
+
34
+ # Copy from sub directory
35
+ fs.get(fs_join(source, "subdir", "subfile1"), target)
36
+ assert local_fs.isfile(target_subfile1)
37
+
38
+ # Remove copied files
39
+ local_fs.rm([target_file2, target_subfile1])
40
+ assert not local_fs.exists(target_file2)
41
+ assert not local_fs.exists(target_subfile1)
42
+
43
+ # Repeat with trailing slash on target
44
+ fs.get(fs_join(source, "file2"), target + "/")
45
+ assert local_fs.isdir(target)
46
+ assert local_fs.isfile(target_file2)
47
+
48
+ fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
49
+ assert local_fs.isfile(target_subfile1)
50
+
51
+ def test_get_file_to_new_directory(
52
+ self,
53
+ fs,
54
+ fs_join,
55
+ fs_bulk_operations_scenario_0,
56
+ local_fs,
57
+ local_join,
58
+ local_target,
59
+ ):
60
+ # Copy scenario 1b
61
+ source = fs_bulk_operations_scenario_0
62
+
63
+ target = local_target
64
+ local_fs.mkdir(target)
65
+
66
+ fs.get(
67
+ fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
68
+ ) # Note trailing slash
69
+
70
+ assert local_fs.isdir(target)
71
+ assert local_fs.isdir(local_join(target, "newdir"))
72
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
73
+
74
+ def test_get_file_to_file_in_existing_directory(
75
+ self,
76
+ fs,
77
+ fs_join,
78
+ fs_bulk_operations_scenario_0,
79
+ local_fs,
80
+ local_join,
81
+ local_target,
82
+ ):
83
+ # Copy scenario 1c
84
+ source = fs_bulk_operations_scenario_0
85
+
86
+ target = local_target
87
+ local_fs.mkdir(target)
88
+
89
+ fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
90
+ assert local_fs.isfile(local_join(target, "newfile"))
91
+
92
+ def test_get_file_to_file_in_new_directory(
93
+ self,
94
+ fs,
95
+ fs_join,
96
+ fs_bulk_operations_scenario_0,
97
+ local_fs,
98
+ local_join,
99
+ local_target,
100
+ ):
101
+ # Copy scenario 1d
102
+ source = fs_bulk_operations_scenario_0
103
+
104
+ target = local_target
105
+ local_fs.mkdir(target)
106
+
107
+ fs.get(
108
+ fs_join(source, "subdir", "subfile1"),
109
+ local_join(target, "newdir", "newfile"),
110
+ )
111
+ assert local_fs.isdir(local_join(target, "newdir"))
112
+ assert local_fs.isfile(local_join(target, "newdir", "newfile"))
113
+
114
+ def test_get_directory_to_existing_directory(
115
+ self,
116
+ fs,
117
+ fs_join,
118
+ fs_bulk_operations_scenario_0,
119
+ local_fs,
120
+ local_join,
121
+ local_target,
122
+ ):
123
+ # Copy scenario 1e
124
+ source = fs_bulk_operations_scenario_0
125
+
126
+ target = local_target
127
+ local_fs.mkdir(target)
128
+ assert local_fs.isdir(target)
129
+
130
+ for source_slash, target_slash in zip([False, True], [False, True]):
131
+ s = fs_join(source, "subdir")
132
+ if source_slash:
133
+ s += "/"
134
+ t = target + "/" if target_slash else target
135
+
136
+ # Without recursive does nothing
137
+ fs.get(s, t)
138
+ assert local_fs.ls(target) == []
139
+
140
+ # With recursive
141
+ fs.get(s, t, recursive=True)
142
+ if source_slash:
143
+ assert local_fs.isfile(local_join(target, "subfile1"))
144
+ assert local_fs.isfile(local_join(target, "subfile2"))
145
+ assert local_fs.isdir(local_join(target, "nesteddir"))
146
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
147
+ assert not local_fs.exists(local_join(target, "subdir"))
148
+
149
+ local_fs.rm(
150
+ [
151
+ local_join(target, "subfile1"),
152
+ local_join(target, "subfile2"),
153
+ local_join(target, "nesteddir"),
154
+ ],
155
+ recursive=True,
156
+ )
157
+ else:
158
+ assert local_fs.isdir(local_join(target, "subdir"))
159
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
160
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
161
+ assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
162
+ assert local_fs.isfile(
163
+ local_join(target, "subdir", "nesteddir", "nestedfile")
164
+ )
165
+
166
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
167
+ assert local_fs.ls(target) == []
168
+
169
+ # Limit recursive by maxdepth
170
+ fs.get(s, t, recursive=True, maxdepth=1)
171
+ if source_slash:
172
+ assert local_fs.isfile(local_join(target, "subfile1"))
173
+ assert local_fs.isfile(local_join(target, "subfile2"))
174
+ assert not local_fs.exists(local_join(target, "nesteddir"))
175
+ assert not local_fs.exists(local_join(target, "subdir"))
176
+
177
+ local_fs.rm(
178
+ [
179
+ local_join(target, "subfile1"),
180
+ local_join(target, "subfile2"),
181
+ ],
182
+ recursive=True,
183
+ )
184
+ else:
185
+ assert local_fs.isdir(local_join(target, "subdir"))
186
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
187
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
188
+ assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
189
+
190
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
191
+ assert local_fs.ls(target) == []
192
+
193
+ def test_get_directory_to_new_directory(
194
+ self,
195
+ fs,
196
+ fs_join,
197
+ fs_bulk_operations_scenario_0,
198
+ local_fs,
199
+ local_join,
200
+ local_target,
201
+ ):
202
+ # Copy scenario 1f
203
+ source = fs_bulk_operations_scenario_0
204
+
205
+ target = local_target
206
+ local_fs.mkdir(target)
207
+
208
+ for source_slash, target_slash in zip([False, True], [False, True]):
209
+ s = fs_join(source, "subdir")
210
+ if source_slash:
211
+ s += "/"
212
+ t = local_join(target, "newdir")
213
+ if target_slash:
214
+ t += "/"
215
+
216
+ # Without recursive does nothing
217
+ fs.get(s, t)
218
+ assert local_fs.ls(target) == []
219
+
220
+ # With recursive
221
+ fs.get(s, t, recursive=True)
222
+ assert local_fs.isdir(local_join(target, "newdir"))
223
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
224
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
225
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
226
+ assert local_fs.isfile(
227
+ local_join(target, "newdir", "nesteddir", "nestedfile")
228
+ )
229
+ assert not local_fs.exists(local_join(target, "subdir"))
230
+
231
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
232
+ assert local_fs.ls(target) == []
233
+
234
+ # Limit recursive by maxdepth
235
+ fs.get(s, t, recursive=True, maxdepth=1)
236
+ assert local_fs.isdir(local_join(target, "newdir"))
237
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
238
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
239
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
240
+ assert not local_fs.exists(local_join(target, "subdir"))
241
+
242
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
243
+ assert not local_fs.exists(local_join(target, "newdir"))
244
+
245
+ def test_get_glob_to_existing_directory(
246
+ self,
247
+ fs,
248
+ fs_join,
249
+ fs_bulk_operations_scenario_0,
250
+ local_fs,
251
+ local_join,
252
+ local_target,
253
+ ):
254
+ # Copy scenario 1g
255
+ source = fs_bulk_operations_scenario_0
256
+
257
+ target = local_target
258
+ local_fs.mkdir(target)
259
+
260
+ for target_slash in [False, True]:
261
+ t = target + "/" if target_slash else target
262
+
263
+ # Without recursive
264
+ fs.get(fs_join(source, "subdir", "*"), t)
265
+ assert local_fs.isfile(local_join(target, "subfile1"))
266
+ assert local_fs.isfile(local_join(target, "subfile2"))
267
+ assert not local_fs.isdir(local_join(target, "nesteddir"))
268
+ assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
269
+ assert not local_fs.exists(local_join(target, "subdir"))
270
+
271
+ local_fs.rm(
272
+ [
273
+ local_join(target, "subfile1"),
274
+ local_join(target, "subfile2"),
275
+ ],
276
+ recursive=True,
277
+ )
278
+ assert local_fs.ls(target) == []
279
+
280
+ # With recursive
281
+ for glob, recursive in zip(["*", "**"], [True, False]):
282
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
283
+ assert local_fs.isfile(local_join(target, "subfile1"))
284
+ assert local_fs.isfile(local_join(target, "subfile2"))
285
+ assert local_fs.isdir(local_join(target, "nesteddir"))
286
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
287
+ assert not local_fs.exists(local_join(target, "subdir"))
288
+
289
+ local_fs.rm(
290
+ [
291
+ local_join(target, "subfile1"),
292
+ local_join(target, "subfile2"),
293
+ local_join(target, "nesteddir"),
294
+ ],
295
+ recursive=True,
296
+ )
297
+ assert local_fs.ls(target) == []
298
+
299
+ # Limit recursive by maxdepth
300
+ fs.get(
301
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
302
+ )
303
+ assert local_fs.isfile(local_join(target, "subfile1"))
304
+ assert local_fs.isfile(local_join(target, "subfile2"))
305
+ assert not local_fs.exists(local_join(target, "nesteddir"))
306
+ assert not local_fs.exists(local_join(target, "subdir"))
307
+
308
+ local_fs.rm(
309
+ [
310
+ local_join(target, "subfile1"),
311
+ local_join(target, "subfile2"),
312
+ ],
313
+ recursive=True,
314
+ )
315
+ assert local_fs.ls(target) == []
316
+
317
+ def test_get_glob_to_new_directory(
318
+ self,
319
+ fs,
320
+ fs_join,
321
+ fs_bulk_operations_scenario_0,
322
+ local_fs,
323
+ local_join,
324
+ local_target,
325
+ ):
326
+ # Copy scenario 1h
327
+ source = fs_bulk_operations_scenario_0
328
+
329
+ target = local_target
330
+ local_fs.mkdir(target)
331
+
332
+ for target_slash in [False, True]:
333
+ t = fs_join(target, "newdir")
334
+ if target_slash:
335
+ t += "/"
336
+
337
+ # Without recursive
338
+ fs.get(fs_join(source, "subdir", "*"), t)
339
+ assert local_fs.isdir(local_join(target, "newdir"))
340
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
341
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
342
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
343
+ assert not local_fs.exists(
344
+ local_join(target, "newdir", "nesteddir", "nestedfile")
345
+ )
346
+ assert not local_fs.exists(local_join(target, "subdir"))
347
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
348
+
349
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
350
+ assert local_fs.ls(target) == []
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert local_fs.isdir(local_join(target, "newdir"))
356
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
357
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
358
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
359
+ assert local_fs.isfile(
360
+ local_join(target, "newdir", "nesteddir", "nestedfile")
361
+ )
362
+ assert not local_fs.exists(local_join(target, "subdir"))
363
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
364
+
365
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
366
+ assert not local_fs.exists(local_join(target, "newdir"))
367
+
368
+ # Limit recursive by maxdepth
369
+ fs.get(
370
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
371
+ )
372
+ assert local_fs.isdir(local_join(target, "newdir"))
373
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
374
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
375
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
376
+ assert not local_fs.exists(local_join(target, "subdir"))
377
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
378
+
379
+ local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
380
+ assert not local_fs.exists(local_join(target, "newdir"))
381
+
382
+ @pytest.mark.parametrize(
383
+ GLOB_EDGE_CASES_TESTS["argnames"],
384
+ GLOB_EDGE_CASES_TESTS["argvalues"],
385
+ )
386
+ def test_get_glob_edge_cases(
387
+ self,
388
+ path,
389
+ recursive,
390
+ maxdepth,
391
+ expected,
392
+ fs,
393
+ fs_join,
394
+ fs_glob_edge_cases_files,
395
+ local_fs,
396
+ local_join,
397
+ local_target,
398
+ ):
399
+ # Copy scenario 1g
400
+ source = fs_glob_edge_cases_files
401
+
402
+ target = local_target
403
+
404
+ for new_dir, target_slash in product([True, False], [True, False]):
405
+ local_fs.mkdir(target)
406
+
407
+ t = local_join(target, "newdir") if new_dir else target
408
+ t = t + "/" if target_slash else t
409
+
410
+ fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
411
+
412
+ output = local_fs.find(target)
413
+ if new_dir:
414
+ prefixed_expected = [
415
+ make_path_posix(local_join(target, "newdir", p)) for p in expected
416
+ ]
417
+ else:
418
+ prefixed_expected = [
419
+ make_path_posix(local_join(target, p)) for p in expected
420
+ ]
421
+ assert sorted(output) == sorted(prefixed_expected)
422
+
423
+ try:
424
+ local_fs.rm(target, recursive=True)
425
+ except FileNotFoundError:
426
+ pass
427
+
428
+ def test_get_list_of_files_to_existing_directory(
429
+ self,
430
+ fs,
431
+ fs_join,
432
+ fs_bulk_operations_scenario_0,
433
+ local_fs,
434
+ local_join,
435
+ local_target,
436
+ ):
437
+ # Copy scenario 2a
438
+ source = fs_bulk_operations_scenario_0
439
+
440
+ target = local_target
441
+ local_fs.mkdir(target)
442
+
443
+ source_files = [
444
+ fs_join(source, "file1"),
445
+ fs_join(source, "file2"),
446
+ fs_join(source, "subdir", "subfile1"),
447
+ ]
448
+
449
+ for target_slash in [False, True]:
450
+ t = target + "/" if target_slash else target
451
+
452
+ fs.get(source_files, t)
453
+ assert local_fs.isfile(local_join(target, "file1"))
454
+ assert local_fs.isfile(local_join(target, "file2"))
455
+ assert local_fs.isfile(local_join(target, "subfile1"))
456
+
457
+ local_fs.rm(
458
+ [
459
+ local_join(target, "file1"),
460
+ local_join(target, "file2"),
461
+ local_join(target, "subfile1"),
462
+ ],
463
+ recursive=True,
464
+ )
465
+ assert local_fs.ls(target) == []
466
+
467
+ def test_get_list_of_files_to_new_directory(
468
+ self,
469
+ fs,
470
+ fs_join,
471
+ fs_bulk_operations_scenario_0,
472
+ local_fs,
473
+ local_join,
474
+ local_target,
475
+ ):
476
+ # Copy scenario 2b
477
+ source = fs_bulk_operations_scenario_0
478
+
479
+ target = local_target
480
+ local_fs.mkdir(target)
481
+
482
+ source_files = [
483
+ fs_join(source, "file1"),
484
+ fs_join(source, "file2"),
485
+ fs_join(source, "subdir", "subfile1"),
486
+ ]
487
+
488
+ fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
489
+ assert local_fs.isdir(local_join(target, "newdir"))
490
+ assert local_fs.isfile(local_join(target, "newdir", "file1"))
491
+ assert local_fs.isfile(local_join(target, "newdir", "file2"))
492
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
493
+
494
+ def test_get_directory_recursive(
495
+ self, fs, fs_join, fs_path, local_fs, local_join, local_target
496
+ ):
497
+ # https://github.com/fsspec/filesystem_spec/issues/1062
498
+ # Recursive cp/get/put of source directory into non-existent target directory.
499
+ src = fs_join(fs_path, "src")
500
+ src_file = fs_join(src, "file")
501
+ fs.mkdir(src)
502
+ fs.touch(src_file)
503
+
504
+ target = local_target
505
+
506
+ # get without slash
507
+ assert not local_fs.exists(target)
508
+ for loop in range(2):
509
+ fs.get(src, target, recursive=True)
510
+ assert local_fs.isdir(target)
511
+
512
+ if loop == 0:
513
+ assert local_fs.isfile(local_join(target, "file"))
514
+ assert not local_fs.exists(local_join(target, "src"))
515
+ else:
516
+ assert local_fs.isfile(local_join(target, "file"))
517
+ assert local_fs.isdir(local_join(target, "src"))
518
+ assert local_fs.isfile(local_join(target, "src", "file"))
519
+
520
+ local_fs.rm(target, recursive=True)
521
+
522
+ # get with slash
523
+ assert not local_fs.exists(target)
524
+ for loop in range(2):
525
+ fs.get(src + "/", target, recursive=True)
526
+ assert local_fs.isdir(target)
527
+ assert local_fs.isfile(local_join(target, "file"))
528
+ assert not local_fs.exists(local_join(target, "src"))
529
+
530
+ def test_get_directory_without_files_with_same_name_prefix(
531
+ self,
532
+ fs,
533
+ fs_join,
534
+ local_fs,
535
+ local_join,
536
+ local_target,
537
+ fs_dir_and_file_with_same_name_prefix,
538
+ ):
539
+ # Create the test dirs
540
+ source = fs_dir_and_file_with_same_name_prefix
541
+ target = local_target
542
+
543
+ # Test without glob
544
+ fs.get(fs_join(source, "subdir"), target, recursive=True)
545
+
546
+ assert local_fs.isfile(local_join(target, "subfile.txt"))
547
+ assert not local_fs.isfile(local_join(target, "subdir.txt"))
548
+
549
+ local_fs.rm([local_join(target, "subfile.txt")])
550
+ assert local_fs.ls(target) == []
551
+
552
+ # Test with glob
553
+ fs.get(fs_join(source, "subdir*"), target, recursive=True)
554
+
555
+ assert local_fs.isdir(local_join(target, "subdir"))
556
+ assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
557
+ assert local_fs.isfile(local_join(target, "subdir.txt"))
558
+
559
+ def test_get_with_source_and_destination_as_list(
560
+ self,
561
+ fs,
562
+ fs_join,
563
+ local_fs,
564
+ local_join,
565
+ local_target,
566
+ fs_10_files_with_hashed_names,
567
+ ):
568
+ # Create the test dir
569
+ source = fs_10_files_with_hashed_names
570
+ target = local_target
571
+
572
+ # Create list of files for source and destination
573
+ source_files = []
574
+ destination_files = []
575
+ for i in range(10):
576
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
577
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
578
+ destination_files.append(
579
+ make_path_posix(local_join(target, f"{hashed_i}.txt"))
580
+ )
581
+
582
+ # Copy and assert order was kept
583
+ fs.get(rpath=source_files, lpath=destination_files)
584
+
585
+ for i in range(10):
586
+ file_content = local_fs.cat(destination_files[i]).decode("utf-8")
587
+ assert file_content == str(i)
lib/python3.11/site-packages/fsspec/tests/abstract/put.py ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractPutTests:
10
+ def test_put_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_target,
15
+ local_join,
16
+ local_bulk_operations_scenario_0,
17
+ supports_empty_directories,
18
+ ):
19
+ # Copy scenario 1a
20
+ source = local_bulk_operations_scenario_0
21
+
22
+ target = fs_target
23
+ fs.mkdir(target)
24
+ if not supports_empty_directories:
25
+ # Force target directory to exist by adding a dummy file
26
+ fs.touch(fs_join(target, "dummy"))
27
+ assert fs.isdir(target)
28
+
29
+ target_file2 = fs_join(target, "file2")
30
+ target_subfile1 = fs_join(target, "subfile1")
31
+
32
+ # Copy from source directory
33
+ fs.put(local_join(source, "file2"), target)
34
+ assert fs.isfile(target_file2)
35
+
36
+ # Copy from sub directory
37
+ fs.put(local_join(source, "subdir", "subfile1"), target)
38
+ assert fs.isfile(target_subfile1)
39
+
40
+ # Remove copied files
41
+ fs.rm([target_file2, target_subfile1])
42
+ assert not fs.exists(target_file2)
43
+ assert not fs.exists(target_subfile1)
44
+
45
+ # Repeat with trailing slash on target
46
+ fs.put(local_join(source, "file2"), target + "/")
47
+ assert fs.isdir(target)
48
+ assert fs.isfile(target_file2)
49
+
50
+ fs.put(local_join(source, "subdir", "subfile1"), target + "/")
51
+ assert fs.isfile(target_subfile1)
52
+
53
+ def test_put_file_to_new_directory(
54
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
55
+ ):
56
+ # Copy scenario 1b
57
+ source = local_bulk_operations_scenario_0
58
+
59
+ target = fs_target
60
+ fs.mkdir(target)
61
+
62
+ fs.put(
63
+ local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
64
+ ) # Note trailing slash
65
+ assert fs.isdir(target)
66
+ assert fs.isdir(fs_join(target, "newdir"))
67
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
68
+
69
+ def test_put_file_to_file_in_existing_directory(
70
+ self,
71
+ fs,
72
+ fs_join,
73
+ fs_target,
74
+ local_join,
75
+ supports_empty_directories,
76
+ local_bulk_operations_scenario_0,
77
+ ):
78
+ # Copy scenario 1c
79
+ source = local_bulk_operations_scenario_0
80
+
81
+ target = fs_target
82
+ fs.mkdir(target)
83
+ if not supports_empty_directories:
84
+ # Force target directory to exist by adding a dummy file
85
+ fs.touch(fs_join(target, "dummy"))
86
+ assert fs.isdir(target)
87
+
88
+ fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
89
+ assert fs.isfile(fs_join(target, "newfile"))
90
+
91
+ def test_put_file_to_file_in_new_directory(
92
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
93
+ ):
94
+ # Copy scenario 1d
95
+ source = local_bulk_operations_scenario_0
96
+
97
+ target = fs_target
98
+ fs.mkdir(target)
99
+
100
+ fs.put(
101
+ local_join(source, "subdir", "subfile1"),
102
+ fs_join(target, "newdir", "newfile"),
103
+ )
104
+ assert fs.isdir(fs_join(target, "newdir"))
105
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
106
+
107
+ def test_put_directory_to_existing_directory(
108
+ self,
109
+ fs,
110
+ fs_join,
111
+ fs_target,
112
+ local_bulk_operations_scenario_0,
113
+ supports_empty_directories,
114
+ ):
115
+ # Copy scenario 1e
116
+ source = local_bulk_operations_scenario_0
117
+
118
+ target = fs_target
119
+ fs.mkdir(target)
120
+ if not supports_empty_directories:
121
+ # Force target directory to exist by adding a dummy file
122
+ dummy = fs_join(target, "dummy")
123
+ fs.touch(dummy)
124
+ assert fs.isdir(target)
125
+
126
+ for source_slash, target_slash in zip([False, True], [False, True]):
127
+ s = fs_join(source, "subdir")
128
+ if source_slash:
129
+ s += "/"
130
+ t = target + "/" if target_slash else target
131
+
132
+ # Without recursive does nothing
133
+ fs.put(s, t)
134
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
135
+
136
+ # With recursive
137
+ fs.put(s, t, recursive=True)
138
+ if source_slash:
139
+ assert fs.isfile(fs_join(target, "subfile1"))
140
+ assert fs.isfile(fs_join(target, "subfile2"))
141
+ assert fs.isdir(fs_join(target, "nesteddir"))
142
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
143
+ assert not fs.exists(fs_join(target, "subdir"))
144
+
145
+ fs.rm(
146
+ [
147
+ fs_join(target, "subfile1"),
148
+ fs_join(target, "subfile2"),
149
+ fs_join(target, "nesteddir"),
150
+ ],
151
+ recursive=True,
152
+ )
153
+ else:
154
+ assert fs.isdir(fs_join(target, "subdir"))
155
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
156
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
157
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
158
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
159
+
160
+ fs.rm(fs_join(target, "subdir"), recursive=True)
161
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
162
+
163
+ # Limit recursive by maxdepth
164
+ fs.put(s, t, recursive=True, maxdepth=1)
165
+ if source_slash:
166
+ assert fs.isfile(fs_join(target, "subfile1"))
167
+ assert fs.isfile(fs_join(target, "subfile2"))
168
+ assert not fs.exists(fs_join(target, "nesteddir"))
169
+ assert not fs.exists(fs_join(target, "subdir"))
170
+
171
+ fs.rm(
172
+ [
173
+ fs_join(target, "subfile1"),
174
+ fs_join(target, "subfile2"),
175
+ ],
176
+ recursive=True,
177
+ )
178
+ else:
179
+ assert fs.isdir(fs_join(target, "subdir"))
180
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
181
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
182
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
183
+
184
+ fs.rm(fs_join(target, "subdir"), recursive=True)
185
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
186
+
187
+ def test_put_directory_to_new_directory(
188
+ self,
189
+ fs,
190
+ fs_join,
191
+ fs_target,
192
+ local_bulk_operations_scenario_0,
193
+ supports_empty_directories,
194
+ ):
195
+ # Copy scenario 1f
196
+ source = local_bulk_operations_scenario_0
197
+
198
+ target = fs_target
199
+ fs.mkdir(target)
200
+
201
+ for source_slash, target_slash in zip([False, True], [False, True]):
202
+ s = fs_join(source, "subdir")
203
+ if source_slash:
204
+ s += "/"
205
+ t = fs_join(target, "newdir")
206
+ if target_slash:
207
+ t += "/"
208
+
209
+ # Without recursive does nothing
210
+ fs.put(s, t)
211
+ if supports_empty_directories:
212
+ assert fs.ls(target) == []
213
+ else:
214
+ with pytest.raises(FileNotFoundError):
215
+ fs.ls(target)
216
+
217
+ # With recursive
218
+ fs.put(s, t, recursive=True)
219
+ assert fs.isdir(fs_join(target, "newdir"))
220
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
221
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
222
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
223
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
224
+ assert not fs.exists(fs_join(target, "subdir"))
225
+
226
+ fs.rm(fs_join(target, "newdir"), recursive=True)
227
+ assert not fs.exists(fs_join(target, "newdir"))
228
+
229
+ # Limit recursive by maxdepth
230
+ fs.put(s, t, recursive=True, maxdepth=1)
231
+ assert fs.isdir(fs_join(target, "newdir"))
232
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
233
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
234
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
235
+ assert not fs.exists(fs_join(target, "subdir"))
236
+
237
+ fs.rm(fs_join(target, "newdir"), recursive=True)
238
+ assert not fs.exists(fs_join(target, "newdir"))
239
+
240
+ def test_put_glob_to_existing_directory(
241
+ self,
242
+ fs,
243
+ fs_join,
244
+ fs_target,
245
+ local_join,
246
+ supports_empty_directories,
247
+ local_bulk_operations_scenario_0,
248
+ ):
249
+ # Copy scenario 1g
250
+ source = local_bulk_operations_scenario_0
251
+
252
+ target = fs_target
253
+ fs.mkdir(target)
254
+ if not supports_empty_directories:
255
+ # Force target directory to exist by adding a dummy file
256
+ dummy = fs_join(target, "dummy")
257
+ fs.touch(dummy)
258
+ assert fs.isdir(target)
259
+
260
+ for target_slash in [False, True]:
261
+ t = target + "/" if target_slash else target
262
+
263
+ # Without recursive
264
+ fs.put(local_join(source, "subdir", "*"), t)
265
+ assert fs.isfile(fs_join(target, "subfile1"))
266
+ assert fs.isfile(fs_join(target, "subfile2"))
267
+ assert not fs.isdir(fs_join(target, "nesteddir"))
268
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
269
+ assert not fs.exists(fs_join(target, "subdir"))
270
+
271
+ fs.rm(
272
+ [
273
+ fs_join(target, "subfile1"),
274
+ fs_join(target, "subfile2"),
275
+ ],
276
+ recursive=True,
277
+ )
278
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
279
+
280
+ # With recursive
281
+ for glob, recursive in zip(["*", "**"], [True, False]):
282
+ fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
283
+ assert fs.isfile(fs_join(target, "subfile1"))
284
+ assert fs.isfile(fs_join(target, "subfile2"))
285
+ assert fs.isdir(fs_join(target, "nesteddir"))
286
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
287
+ assert not fs.exists(fs_join(target, "subdir"))
288
+
289
+ fs.rm(
290
+ [
291
+ fs_join(target, "subfile1"),
292
+ fs_join(target, "subfile2"),
293
+ fs_join(target, "nesteddir"),
294
+ ],
295
+ recursive=True,
296
+ )
297
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
298
+
299
+ # Limit recursive by maxdepth
300
+ fs.put(
301
+ local_join(source, "subdir", glob),
302
+ t,
303
+ recursive=recursive,
304
+ maxdepth=1,
305
+ )
306
+ assert fs.isfile(fs_join(target, "subfile1"))
307
+ assert fs.isfile(fs_join(target, "subfile2"))
308
+ assert not fs.exists(fs_join(target, "nesteddir"))
309
+ assert not fs.exists(fs_join(target, "subdir"))
310
+
311
+ fs.rm(
312
+ [
313
+ fs_join(target, "subfile1"),
314
+ fs_join(target, "subfile2"),
315
+ ],
316
+ recursive=True,
317
+ )
318
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
319
+
320
+ def test_put_glob_to_new_directory(
321
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
322
+ ):
323
+ # Copy scenario 1h
324
+ source = local_bulk_operations_scenario_0
325
+
326
+ target = fs_target
327
+ fs.mkdir(target)
328
+
329
+ for target_slash in [False, True]:
330
+ t = fs_join(target, "newdir")
331
+ if target_slash:
332
+ t += "/"
333
+
334
+ # Without recursive
335
+ fs.put(local_join(source, "subdir", "*"), t)
336
+ assert fs.isdir(fs_join(target, "newdir"))
337
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
338
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
339
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
340
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
341
+ assert not fs.exists(fs_join(target, "subdir"))
342
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
343
+
344
+ fs.rm(fs_join(target, "newdir"), recursive=True)
345
+ assert not fs.exists(fs_join(target, "newdir"))
346
+
347
+ # With recursive
348
+ for glob, recursive in zip(["*", "**"], [True, False]):
349
+ fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
350
+ assert fs.isdir(fs_join(target, "newdir"))
351
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
352
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
353
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
354
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
355
+ assert not fs.exists(fs_join(target, "subdir"))
356
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
357
+
358
+ fs.rm(fs_join(target, "newdir"), recursive=True)
359
+ assert not fs.exists(fs_join(target, "newdir"))
360
+
361
+ # Limit recursive by maxdepth
362
+ fs.put(
363
+ local_join(source, "subdir", glob),
364
+ t,
365
+ recursive=recursive,
366
+ maxdepth=1,
367
+ )
368
+ assert fs.isdir(fs_join(target, "newdir"))
369
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
370
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
371
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
372
+ assert not fs.exists(fs_join(target, "subdir"))
373
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
374
+
375
+ fs.rm(fs_join(target, "newdir"), recursive=True)
376
+ assert not fs.exists(fs_join(target, "newdir"))
377
+
378
+ @pytest.mark.parametrize(
379
+ GLOB_EDGE_CASES_TESTS["argnames"],
380
+ GLOB_EDGE_CASES_TESTS["argvalues"],
381
+ )
382
+ def test_put_glob_edge_cases(
383
+ self,
384
+ path,
385
+ recursive,
386
+ maxdepth,
387
+ expected,
388
+ fs,
389
+ fs_join,
390
+ fs_target,
391
+ local_glob_edge_cases_files,
392
+ local_join,
393
+ fs_sanitize_path,
394
+ ):
395
+ # Copy scenario 1g
396
+ source = local_glob_edge_cases_files
397
+
398
+ target = fs_target
399
+
400
+ for new_dir, target_slash in product([True, False], [True, False]):
401
+ fs.mkdir(target)
402
+
403
+ t = fs_join(target, "newdir") if new_dir else target
404
+ t = t + "/" if target_slash else t
405
+
406
+ fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
407
+
408
+ output = fs.find(target)
409
+ if new_dir:
410
+ prefixed_expected = [
411
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
412
+ ]
413
+ else:
414
+ prefixed_expected = [
415
+ fs_sanitize_path(fs_join(target, p)) for p in expected
416
+ ]
417
+ assert sorted(output) == sorted(prefixed_expected)
418
+
419
+ try:
420
+ fs.rm(target, recursive=True)
421
+ except FileNotFoundError:
422
+ pass
423
+
424
+ def test_put_list_of_files_to_existing_directory(
425
+ self,
426
+ fs,
427
+ fs_join,
428
+ fs_target,
429
+ local_join,
430
+ local_bulk_operations_scenario_0,
431
+ supports_empty_directories,
432
+ ):
433
+ # Copy scenario 2a
434
+ source = local_bulk_operations_scenario_0
435
+
436
+ target = fs_target
437
+ fs.mkdir(target)
438
+ if not supports_empty_directories:
439
+ # Force target directory to exist by adding a dummy file
440
+ dummy = fs_join(target, "dummy")
441
+ fs.touch(dummy)
442
+ assert fs.isdir(target)
443
+
444
+ source_files = [
445
+ local_join(source, "file1"),
446
+ local_join(source, "file2"),
447
+ local_join(source, "subdir", "subfile1"),
448
+ ]
449
+
450
+ for target_slash in [False, True]:
451
+ t = target + "/" if target_slash else target
452
+
453
+ fs.put(source_files, t)
454
+ assert fs.isfile(fs_join(target, "file1"))
455
+ assert fs.isfile(fs_join(target, "file2"))
456
+ assert fs.isfile(fs_join(target, "subfile1"))
457
+
458
+ fs.rm(
459
+ [
460
+ fs_join(target, "file1"),
461
+ fs_join(target, "file2"),
462
+ fs_join(target, "subfile1"),
463
+ ],
464
+ recursive=True,
465
+ )
466
+ assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
467
+
468
+ def test_put_list_of_files_to_new_directory(
469
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
470
+ ):
471
+ # Copy scenario 2b
472
+ source = local_bulk_operations_scenario_0
473
+
474
+ target = fs_target
475
+ fs.mkdir(target)
476
+
477
+ source_files = [
478
+ local_join(source, "file1"),
479
+ local_join(source, "file2"),
480
+ local_join(source, "subdir", "subfile1"),
481
+ ]
482
+
483
+ fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
484
+ assert fs.isdir(fs_join(target, "newdir"))
485
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
486
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
487
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
488
+
489
+ def test_put_directory_recursive(
490
+ self, fs, fs_join, fs_target, local_fs, local_join, local_path
491
+ ):
492
+ # https://github.com/fsspec/filesystem_spec/issues/1062
493
+ # Recursive cp/get/put of source directory into non-existent target directory.
494
+ src = local_join(local_path, "src")
495
+ src_file = local_join(src, "file")
496
+ local_fs.mkdir(src)
497
+ local_fs.touch(src_file)
498
+
499
+ target = fs_target
500
+
501
+ # put without slash
502
+ assert not fs.exists(target)
503
+ for loop in range(2):
504
+ fs.put(src, target, recursive=True)
505
+ assert fs.isdir(target)
506
+
507
+ if loop == 0:
508
+ assert fs.isfile(fs_join(target, "file"))
509
+ assert not fs.exists(fs_join(target, "src"))
510
+ else:
511
+ assert fs.isfile(fs_join(target, "file"))
512
+ assert fs.isdir(fs_join(target, "src"))
513
+ assert fs.isfile(fs_join(target, "src", "file"))
514
+
515
+ fs.rm(target, recursive=True)
516
+
517
+ # put with slash
518
+ assert not fs.exists(target)
519
+ for loop in range(2):
520
+ fs.put(src + "/", target, recursive=True)
521
+ assert fs.isdir(target)
522
+ assert fs.isfile(fs_join(target, "file"))
523
+ assert not fs.exists(fs_join(target, "src"))
524
+
525
+ def test_put_directory_without_files_with_same_name_prefix(
526
+ self,
527
+ fs,
528
+ fs_join,
529
+ fs_target,
530
+ local_join,
531
+ local_dir_and_file_with_same_name_prefix,
532
+ supports_empty_directories,
533
+ ):
534
+ # Create the test dirs
535
+ source = local_dir_and_file_with_same_name_prefix
536
+ target = fs_target
537
+
538
+ # Test without glob
539
+ fs.put(local_join(source, "subdir"), fs_target, recursive=True)
540
+
541
+ assert fs.isfile(fs_join(fs_target, "subfile.txt"))
542
+ assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
543
+
544
+ fs.rm([fs_join(target, "subfile.txt")])
545
+ if supports_empty_directories:
546
+ assert fs.ls(target) == []
547
+ else:
548
+ assert not fs.exists(target)
549
+
550
+ # Test with glob
551
+ fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
552
+
553
+ assert fs.isdir(fs_join(fs_target, "subdir"))
554
+ assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
555
+ assert fs.isfile(fs_join(fs_target, "subdir.txt"))
556
+
557
+ def test_copy_with_source_and_destination_as_list(
558
+ self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
559
+ ):
560
+ # Create the test dir
561
+ source = local_10_files_with_hashed_names
562
+ target = fs_target
563
+
564
+ # Create list of files for source and destination
565
+ source_files = []
566
+ destination_files = []
567
+ for i in range(10):
568
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
569
+ source_files.append(local_join(source, f"{hashed_i}.txt"))
570
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
571
+
572
+ # Copy and assert order was kept
573
+ fs.put(lpath=source_files, rpath=destination_files)
574
+
575
+ for i in range(10):
576
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
577
+ assert file_content == str(i)
lib/python3.11/site-packages/fsspec/transaction.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+
3
+
4
+ class Transaction:
5
+ """Filesystem transaction write context
6
+
7
+ Gathers files for deferred commit or discard, so that several write
8
+ operations can be finalized semi-atomically. This works by having this
9
+ instance as the ``.transaction`` attribute of the given filesystem
10
+ """
11
+
12
+ def __init__(self, fs):
13
+ """
14
+ Parameters
15
+ ----------
16
+ fs: FileSystem instance
17
+ """
18
+ self.fs = fs
19
+ self.files = deque()
20
+
21
+ def __enter__(self):
22
+ self.start()
23
+ return self
24
+
25
+ def __exit__(self, exc_type, exc_val, exc_tb):
26
+ """End transaction and commit, if exit is not due to exception"""
27
+ # only commit if there was no exception
28
+ self.complete(commit=exc_type is None)
29
+ self.fs._intrans = False
30
+ self.fs._transaction = None
31
+
32
+ def start(self):
33
+ """Start a transaction on this FileSystem"""
34
+ self.files = deque() # clean up after previous failed completions
35
+ self.fs._intrans = True
36
+
37
+ def complete(self, commit=True):
38
+ """Finish transaction: commit or discard all deferred files"""
39
+ while self.files:
40
+ f = self.files.popleft()
41
+ if commit:
42
+ f.commit()
43
+ else:
44
+ f.discard()
45
+ self.fs._intrans = False
46
+
47
+
48
+ class FileActor:
49
+ def __init__(self):
50
+ self.files = []
51
+
52
+ def commit(self):
53
+ for f in self.files:
54
+ f.commit()
55
+ self.files.clear()
56
+
57
+ def discard(self):
58
+ for f in self.files:
59
+ f.discard()
60
+ self.files.clear()
61
+
62
+ def append(self, f):
63
+ self.files.append(f)
64
+
65
+
66
+ class DaskTransaction(Transaction):
67
+ def __init__(self, fs):
68
+ """
69
+ Parameters
70
+ ----------
71
+ fs: FileSystem instance
72
+ """
73
+ import distributed
74
+
75
+ super().__init__(fs)
76
+ client = distributed.default_client()
77
+ self.files = client.submit(FileActor, actor=True).result()
78
+
79
+ def complete(self, commit=True):
80
+ """Finish transaction: commit or discard all deferred files"""
81
+ if commit:
82
+ self.files.commit().result()
83
+ else:
84
+ self.files.discard().result()
85
+ self.fs._intrans = False
lib/python3.11/site-packages/fsspec/utils.py ADDED
@@ -0,0 +1,742 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import math
6
+ import os
7
+ import pathlib
8
+ import re
9
+ import sys
10
+ import tempfile
11
+ from functools import partial
12
+ from hashlib import md5
13
+ from importlib.metadata import version
14
+ from typing import (
15
+ IO,
16
+ TYPE_CHECKING,
17
+ Any,
18
+ Callable,
19
+ Iterable,
20
+ Iterator,
21
+ Sequence,
22
+ TypeVar,
23
+ )
24
+ from urllib.parse import urlsplit
25
+
26
+ if TYPE_CHECKING:
27
+ from typing_extensions import TypeGuard
28
+
29
+ from fsspec.spec import AbstractFileSystem
30
+
31
+
32
+ DEFAULT_BLOCK_SIZE = 5 * 2**20
33
+
34
+ T = TypeVar("T")
35
+
36
+
37
+ def infer_storage_options(
38
+ urlpath: str, inherit_storage_options: dict[str, Any] | None = None
39
+ ) -> dict[str, Any]:
40
+ """Infer storage options from URL path and merge it with existing storage
41
+ options.
42
+
43
+ Parameters
44
+ ----------
45
+ urlpath: str or unicode
46
+ Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
47
+ inherit_storage_options: dict (optional)
48
+ Its contents will get merged with the inferred information from the
49
+ given path
50
+
51
+ Returns
52
+ -------
53
+ Storage options dict.
54
+
55
+ Examples
56
+ --------
57
+ >>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
58
+ {"protocol": "file", "path", "/mnt/datasets/test.csv"}
59
+ >>> infer_storage_options(
60
+ ... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
61
+ ... inherit_storage_options={'extra': 'value'},
62
+ ... ) # doctest: +SKIP
63
+ {"protocol": "hdfs", "username": "username", "password": "pwd",
64
+ "host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
65
+ "url_query": "q=1", "extra": "value"}
66
+ """
67
+ # Handle Windows paths including disk name in this special case
68
+ if (
69
+ re.match(r"^[a-zA-Z]:[\\/]", urlpath)
70
+ or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
71
+ ):
72
+ return {"protocol": "file", "path": urlpath}
73
+
74
+ parsed_path = urlsplit(urlpath)
75
+ protocol = parsed_path.scheme or "file"
76
+ if parsed_path.fragment:
77
+ path = "#".join([parsed_path.path, parsed_path.fragment])
78
+ else:
79
+ path = parsed_path.path
80
+ if protocol == "file":
81
+ # Special case parsing file protocol URL on Windows according to:
82
+ # https://msdn.microsoft.com/en-us/library/jj710207.aspx
83
+ windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
84
+ if windows_path:
85
+ path = "%s:%s" % windows_path.groups()
86
+
87
+ if protocol in ["http", "https"]:
88
+ # for HTTP, we don't want to parse, as requests will anyway
89
+ return {"protocol": protocol, "path": urlpath}
90
+
91
+ options: dict[str, Any] = {"protocol": protocol, "path": path}
92
+
93
+ if parsed_path.netloc:
94
+ # Parse `hostname` from netloc manually because `parsed_path.hostname`
95
+ # lowercases the hostname which is not always desirable (e.g. in S3):
96
+ # https://github.com/dask/dask/issues/1417
97
+ options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
98
+
99
+ if protocol in ("s3", "s3a", "gcs", "gs"):
100
+ options["path"] = options["host"] + options["path"]
101
+ else:
102
+ options["host"] = options["host"]
103
+ if parsed_path.port:
104
+ options["port"] = parsed_path.port
105
+ if parsed_path.username:
106
+ options["username"] = parsed_path.username
107
+ if parsed_path.password:
108
+ options["password"] = parsed_path.password
109
+
110
+ if parsed_path.query:
111
+ options["url_query"] = parsed_path.query
112
+ if parsed_path.fragment:
113
+ options["url_fragment"] = parsed_path.fragment
114
+
115
+ if inherit_storage_options:
116
+ update_storage_options(options, inherit_storage_options)
117
+
118
+ return options
119
+
120
+
121
+ def update_storage_options(
122
+ options: dict[str, Any], inherited: dict[str, Any] | None = None
123
+ ) -> None:
124
+ if not inherited:
125
+ inherited = {}
126
+ collisions = set(options) & set(inherited)
127
+ if collisions:
128
+ for collision in collisions:
129
+ if options.get(collision) != inherited.get(collision):
130
+ raise KeyError(
131
+ f"Collision between inferred and specified storage "
132
+ f"option:\n{collision}"
133
+ )
134
+ options.update(inherited)
135
+
136
+
137
+ # Compression extensions registered via fsspec.compression.register_compression
138
+ compressions: dict[str, str] = {}
139
+
140
+
141
+ def infer_compression(filename: str) -> str | None:
142
+ """Infer compression, if available, from filename.
143
+
144
+ Infer a named compression type, if registered and available, from filename
145
+ extension. This includes builtin (gz, bz2, zip) compressions, as well as
146
+ optional compressions. See fsspec.compression.register_compression.
147
+ """
148
+ extension = os.path.splitext(filename)[-1].strip(".").lower()
149
+ if extension in compressions:
150
+ return compressions[extension]
151
+ return None
152
+
153
+
154
+ def build_name_function(max_int: float) -> Callable[[int], str]:
155
+ """Returns a function that receives a single integer
156
+ and returns it as a string padded by enough zero characters
157
+ to align with maximum possible integer
158
+
159
+ >>> name_f = build_name_function(57)
160
+
161
+ >>> name_f(7)
162
+ '07'
163
+ >>> name_f(31)
164
+ '31'
165
+ >>> build_name_function(1000)(42)
166
+ '0042'
167
+ >>> build_name_function(999)(42)
168
+ '042'
169
+ >>> build_name_function(0)(0)
170
+ '0'
171
+ """
172
+ # handle corner cases max_int is 0 or exact power of 10
173
+ max_int += 1e-8
174
+
175
+ pad_length = int(math.ceil(math.log10(max_int)))
176
+
177
+ def name_function(i: int) -> str:
178
+ return str(i).zfill(pad_length)
179
+
180
+ return name_function
181
+
182
+
183
+ def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
184
+ r"""Seek current file to file start, file end, or byte after delimiter seq.
185
+
186
+ Seeks file to next chunk delimiter, where chunks are defined on file start,
187
+ a delimiting sequence, and file end. Use file.tell() to see location afterwards.
188
+ Note that file start is a valid split, so must be at offset > 0 to seek for
189
+ delimiter.
190
+
191
+ Parameters
192
+ ----------
193
+ file: a file
194
+ delimiter: bytes
195
+ a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
196
+ blocksize: int
197
+ Number of bytes to read from the file at once.
198
+
199
+
200
+ Returns
201
+ -------
202
+ Returns True if a delimiter was found, False if at file start or end.
203
+
204
+ """
205
+
206
+ if file.tell() == 0:
207
+ # beginning-of-file, return without seek
208
+ return False
209
+
210
+ # Interface is for binary IO, with delimiter as bytes, but initialize last
211
+ # with result of file.read to preserve compatibility with text IO.
212
+ last: bytes | None = None
213
+ while True:
214
+ current = file.read(blocksize)
215
+ if not current:
216
+ # end-of-file without delimiter
217
+ return False
218
+ full = last + current if last else current
219
+ try:
220
+ if delimiter in full:
221
+ i = full.index(delimiter)
222
+ file.seek(file.tell() - (len(full) - i) + len(delimiter))
223
+ return True
224
+ elif len(current) < blocksize:
225
+ # end-of-file without delimiter
226
+ return False
227
+ except (OSError, ValueError):
228
+ pass
229
+ last = full[-len(delimiter) :]
230
+
231
+
232
+ def read_block(
233
+ f: IO[bytes],
234
+ offset: int,
235
+ length: int | None,
236
+ delimiter: bytes | None = None,
237
+ split_before: bool = False,
238
+ ) -> bytes:
239
+ """Read a block of bytes from a file
240
+
241
+ Parameters
242
+ ----------
243
+ f: File
244
+ Open file
245
+ offset: int
246
+ Byte offset to start read
247
+ length: int
248
+ Number of bytes to read, read through end of file if None
249
+ delimiter: bytes (optional)
250
+ Ensure reading starts and stops at delimiter bytestring
251
+ split_before: bool (optional)
252
+ Start/stop read *before* delimiter bytestring.
253
+
254
+
255
+ If using the ``delimiter=`` keyword argument we ensure that the read
256
+ starts and stops at delimiter boundaries that follow the locations
257
+ ``offset`` and ``offset + length``. If ``offset`` is zero then we
258
+ start at zero, regardless of delimiter. The bytestring returned WILL
259
+ include the terminating delimiter string.
260
+
261
+ Examples
262
+ --------
263
+
264
+ >>> from io import BytesIO # doctest: +SKIP
265
+ >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
266
+ >>> read_block(f, 0, 13) # doctest: +SKIP
267
+ b'Alice, 100\\nBo'
268
+
269
+ >>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
270
+ b'Alice, 100\\nBob, 200\\n'
271
+
272
+ >>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
273
+ b'Bob, 200\\nCharlie, 300'
274
+ """
275
+ if delimiter:
276
+ f.seek(offset)
277
+ found_start_delim = seek_delimiter(f, delimiter, 2**16)
278
+ if length is None:
279
+ return f.read()
280
+ start = f.tell()
281
+ length -= start - offset
282
+
283
+ f.seek(start + length)
284
+ found_end_delim = seek_delimiter(f, delimiter, 2**16)
285
+ end = f.tell()
286
+
287
+ # Adjust split location to before delimiter iff seek found the
288
+ # delimiter sequence, not start or end of file.
289
+ if found_start_delim and split_before:
290
+ start -= len(delimiter)
291
+
292
+ if found_end_delim and split_before:
293
+ end -= len(delimiter)
294
+
295
+ offset = start
296
+ length = end - start
297
+
298
+ f.seek(offset)
299
+
300
+ # TODO: allow length to be None and read to the end of the file?
301
+ assert length is not None
302
+ b = f.read(length)
303
+ return b
304
+
305
+
306
+ def tokenize(*args: Any, **kwargs: Any) -> str:
307
+ """Deterministic token
308
+
309
+ (modified from dask.base)
310
+
311
+ >>> tokenize([1, 2, '3'])
312
+ '9d71491b50023b06fc76928e6eddb952'
313
+
314
+ >>> tokenize('Hello') == tokenize('Hello')
315
+ True
316
+ """
317
+ if kwargs:
318
+ args += (kwargs,)
319
+ try:
320
+ h = md5(str(args).encode())
321
+ except ValueError:
322
+ # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
323
+ h = md5(str(args).encode(), usedforsecurity=False)
324
+ return h.hexdigest()
325
+
326
+
327
+ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
328
+ """Attempt to convert a path-like object to a string.
329
+
330
+ Parameters
331
+ ----------
332
+ filepath: object to be converted
333
+
334
+ Returns
335
+ -------
336
+ filepath_str: maybe a string version of the object
337
+
338
+ Notes
339
+ -----
340
+ Objects supporting the fspath protocol are coerced according to its
341
+ __fspath__ method.
342
+
343
+ For backwards compatibility with older Python version, pathlib.Path
344
+ objects are specially coerced.
345
+
346
+ Any other object is passed through unchanged, which includes bytes,
347
+ strings, buffers, or anything else that's not even path-like.
348
+ """
349
+ if isinstance(filepath, str):
350
+ return filepath
351
+ elif hasattr(filepath, "__fspath__"):
352
+ return filepath.__fspath__()
353
+ elif isinstance(filepath, pathlib.Path):
354
+ return str(filepath)
355
+ elif hasattr(filepath, "path"):
356
+ return filepath.path
357
+ else:
358
+ return filepath # type: ignore[return-value]
359
+
360
+
361
+ def make_instance(
362
+ cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
363
+ ) -> T:
364
+ inst = cls(*args, **kwargs)
365
+ inst._determine_worker() # type: ignore[attr-defined]
366
+ return inst
367
+
368
+
369
+ def common_prefix(paths: Iterable[str]) -> str:
370
+ """For a list of paths, find the shortest prefix common to all"""
371
+ parts = [p.split("/") for p in paths]
372
+ lmax = min(len(p) for p in parts)
373
+ end = 0
374
+ for i in range(lmax):
375
+ end = all(p[i] == parts[0][i] for p in parts)
376
+ if not end:
377
+ break
378
+ i += end
379
+ return "/".join(parts[0][:i])
380
+
381
+
382
+ def other_paths(
383
+ paths: list[str],
384
+ path2: str | list[str],
385
+ exists: bool = False,
386
+ flatten: bool = False,
387
+ ) -> list[str]:
388
+ """In bulk file operations, construct a new file tree from a list of files
389
+
390
+ Parameters
391
+ ----------
392
+ paths: list of str
393
+ The input file tree
394
+ path2: str or list of str
395
+ Root to construct the new list in. If this is already a list of str, we just
396
+ assert it has the right number of elements.
397
+ exists: bool (optional)
398
+ For a str destination, it is already exists (and is a dir), files should
399
+ end up inside.
400
+ flatten: bool (optional)
401
+ Whether to flatten the input directory tree structure so that the output files
402
+ are in the same directory.
403
+
404
+ Returns
405
+ -------
406
+ list of str
407
+ """
408
+
409
+ if isinstance(path2, str):
410
+ path2 = path2.rstrip("/")
411
+
412
+ if flatten:
413
+ path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
414
+ else:
415
+ cp = common_prefix(paths)
416
+ if exists:
417
+ cp = cp.rsplit("/", 1)[0]
418
+ if not cp and all(not s.startswith("/") for s in paths):
419
+ path2 = ["/".join([path2, p]) for p in paths]
420
+ else:
421
+ path2 = [p.replace(cp, path2, 1) for p in paths]
422
+ else:
423
+ assert len(paths) == len(path2)
424
+ return path2
425
+
426
+
427
+ def is_exception(obj: Any) -> bool:
428
+ return isinstance(obj, BaseException)
429
+
430
+
431
+ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
432
+ for attr in ["read", "close", "tell"]:
433
+ if not hasattr(f, attr):
434
+ return False
435
+ return True
436
+
437
+
438
+ def get_protocol(url: str) -> str:
439
+ url = stringify_path(url)
440
+ parts = re.split(r"(\:\:|\://)", url, 1)
441
+ if len(parts) > 1:
442
+ return parts[0]
443
+ return "file"
444
+
445
+
446
+ def can_be_local(path: str) -> bool:
447
+ """Can the given URL be used with open_local?"""
448
+ from fsspec import get_filesystem_class
449
+
450
+ try:
451
+ return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
452
+ except (ValueError, ImportError):
453
+ # not in registry or import failed
454
+ return False
455
+
456
+
457
+ def get_package_version_without_import(name: str) -> str | None:
458
+ """For given package name, try to find the version without importing it
459
+
460
+ Import and package.__version__ is still the backup here, so an import
461
+ *might* happen.
462
+
463
+ Returns either the version string, or None if the package
464
+ or the version was not readily found.
465
+ """
466
+ if name in sys.modules:
467
+ mod = sys.modules[name]
468
+ if hasattr(mod, "__version__"):
469
+ return mod.__version__
470
+ try:
471
+ return version(name)
472
+ except: # noqa: E722
473
+ pass
474
+ try:
475
+ import importlib
476
+
477
+ mod = importlib.import_module(name)
478
+ return mod.__version__
479
+ except (ImportError, AttributeError):
480
+ return None
481
+
482
+
483
+ def setup_logging(
484
+ logger: logging.Logger | None = None,
485
+ logger_name: str | None = None,
486
+ level: str = "DEBUG",
487
+ clear: bool = True,
488
+ ) -> logging.Logger:
489
+ if logger is None and logger_name is None:
490
+ raise ValueError("Provide either logger object or logger name")
491
+ logger = logger or logging.getLogger(logger_name)
492
+ handle = logging.StreamHandler()
493
+ formatter = logging.Formatter(
494
+ "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
495
+ )
496
+ handle.setFormatter(formatter)
497
+ if clear:
498
+ logger.handlers.clear()
499
+ logger.addHandler(handle)
500
+ logger.setLevel(level)
501
+ return logger
502
+
503
+
504
+ def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
505
+ return fs.unstrip_protocol(name)
506
+
507
+
508
+ def mirror_from(
509
+ origin_name: str, methods: Iterable[str]
510
+ ) -> Callable[[type[T]], type[T]]:
511
+ """Mirror attributes and methods from the given
512
+ origin_name attribute of the instance to the
513
+ decorated class"""
514
+
515
+ def origin_getter(method: str, self: Any) -> Any:
516
+ origin = getattr(self, origin_name)
517
+ return getattr(origin, method)
518
+
519
+ def wrapper(cls: type[T]) -> type[T]:
520
+ for method in methods:
521
+ wrapped_method = partial(origin_getter, method)
522
+ setattr(cls, method, property(wrapped_method))
523
+ return cls
524
+
525
+ return wrapper
526
+
527
+
528
+ @contextlib.contextmanager
529
+ def nullcontext(obj: T) -> Iterator[T]:
530
+ yield obj
531
+
532
+
533
+ def merge_offset_ranges(
534
+ paths: list[str],
535
+ starts: list[int] | int,
536
+ ends: list[int] | int,
537
+ max_gap: int = 0,
538
+ max_block: int | None = None,
539
+ sort: bool = True,
540
+ ) -> tuple[list[str], list[int], list[int]]:
541
+ """Merge adjacent byte-offset ranges when the inter-range
542
+ gap is <= `max_gap`, and when the merged byte range does not
543
+ exceed `max_block` (if specified). By default, this function
544
+ will re-order the input paths and byte ranges to ensure sorted
545
+ order. If the user can guarantee that the inputs are already
546
+ sorted, passing `sort=False` will skip the re-ordering.
547
+ """
548
+ # Check input
549
+ if not isinstance(paths, list):
550
+ raise TypeError
551
+ if not isinstance(starts, list):
552
+ starts = [starts] * len(paths)
553
+ if not isinstance(ends, list):
554
+ ends = [ends] * len(paths)
555
+ if len(starts) != len(paths) or len(ends) != len(paths):
556
+ raise ValueError
557
+
558
+ # Early Return
559
+ if len(starts) <= 1:
560
+ return paths, starts, ends
561
+
562
+ starts = [s or 0 for s in starts]
563
+ # Sort by paths and then ranges if `sort=True`
564
+ if sort:
565
+ paths, starts, ends = (
566
+ list(v)
567
+ for v in zip(
568
+ *sorted(
569
+ zip(paths, starts, ends),
570
+ )
571
+ )
572
+ )
573
+
574
+ if paths:
575
+ # Loop through the coupled `paths`, `starts`, and
576
+ # `ends`, and merge adjacent blocks when appropriate
577
+ new_paths = paths[:1]
578
+ new_starts = starts[:1]
579
+ new_ends = ends[:1]
580
+ for i in range(1, len(paths)):
581
+ if paths[i] == paths[i - 1] and new_ends[-1] is None:
582
+ continue
583
+ elif (
584
+ paths[i] != paths[i - 1]
585
+ or ((starts[i] - new_ends[-1]) > max_gap)
586
+ or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
587
+ ):
588
+ # Cannot merge with previous block.
589
+ # Add new `paths`, `starts`, and `ends` elements
590
+ new_paths.append(paths[i])
591
+ new_starts.append(starts[i])
592
+ new_ends.append(ends[i])
593
+ else:
594
+ # Merge with previous block by updating the
595
+ # last element of `ends`
596
+ new_ends[-1] = ends[i]
597
+ return new_paths, new_starts, new_ends
598
+
599
+ # `paths` is empty. Just return input lists
600
+ return paths, starts, ends
601
+
602
+
603
+ def file_size(filelike: IO[bytes]) -> int:
604
+ """Find length of any open read-mode file-like"""
605
+ pos = filelike.tell()
606
+ try:
607
+ return filelike.seek(0, 2)
608
+ finally:
609
+ filelike.seek(pos)
610
+
611
+
612
+ @contextlib.contextmanager
613
+ def atomic_write(path: str, mode: str = "wb"):
614
+ """
615
+ A context manager that opens a temporary file next to `path` and, on exit,
616
+ replaces `path` with the temporary file, thereby updating `path`
617
+ atomically.
618
+ """
619
+ fd, fn = tempfile.mkstemp(
620
+ dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
621
+ )
622
+ try:
623
+ with open(fd, mode) as fp:
624
+ yield fp
625
+ except BaseException:
626
+ with contextlib.suppress(FileNotFoundError):
627
+ os.unlink(fn)
628
+ raise
629
+ else:
630
+ os.replace(fn, path)
631
+
632
+
633
+ def _translate(pat, STAR, QUESTION_MARK):
634
+ # Copied from: https://github.com/python/cpython/pull/106703.
635
+ res: list[str] = []
636
+ add = res.append
637
+ i, n = 0, len(pat)
638
+ while i < n:
639
+ c = pat[i]
640
+ i = i + 1
641
+ if c == "*":
642
+ # compress consecutive `*` into one
643
+ if (not res) or res[-1] is not STAR:
644
+ add(STAR)
645
+ elif c == "?":
646
+ add(QUESTION_MARK)
647
+ elif c == "[":
648
+ j = i
649
+ if j < n and pat[j] == "!":
650
+ j = j + 1
651
+ if j < n and pat[j] == "]":
652
+ j = j + 1
653
+ while j < n and pat[j] != "]":
654
+ j = j + 1
655
+ if j >= n:
656
+ add("\\[")
657
+ else:
658
+ stuff = pat[i:j]
659
+ if "-" not in stuff:
660
+ stuff = stuff.replace("\\", r"\\")
661
+ else:
662
+ chunks = []
663
+ k = i + 2 if pat[i] == "!" else i + 1
664
+ while True:
665
+ k = pat.find("-", k, j)
666
+ if k < 0:
667
+ break
668
+ chunks.append(pat[i:k])
669
+ i = k + 1
670
+ k = k + 3
671
+ chunk = pat[i:j]
672
+ if chunk:
673
+ chunks.append(chunk)
674
+ else:
675
+ chunks[-1] += "-"
676
+ # Remove empty ranges -- invalid in RE.
677
+ for k in range(len(chunks) - 1, 0, -1):
678
+ if chunks[k - 1][-1] > chunks[k][0]:
679
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
680
+ del chunks[k]
681
+ # Escape backslashes and hyphens for set difference (--).
682
+ # Hyphens that create ranges shouldn't be escaped.
683
+ stuff = "-".join(
684
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
685
+ )
686
+ # Escape set operations (&&, ~~ and ||).
687
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
688
+ i = j + 1
689
+ if not stuff:
690
+ # Empty range: never match.
691
+ add("(?!)")
692
+ elif stuff == "!":
693
+ # Negated empty range: match any character.
694
+ add(".")
695
+ else:
696
+ if stuff[0] == "!":
697
+ stuff = "^" + stuff[1:]
698
+ elif stuff[0] in ("^", "["):
699
+ stuff = "\\" + stuff
700
+ add(f"[{stuff}]")
701
+ else:
702
+ add(re.escape(c))
703
+ assert i == n
704
+ return res
705
+
706
+
707
+ def glob_translate(pat):
708
+ # Copied from: https://github.com/python/cpython/pull/106703.
709
+ # The keyword parameters' values are fixed to:
710
+ # recursive=True, include_hidden=True, seps=None
711
+ """Translate a pathname with shell wildcards to a regular expression."""
712
+ if os.path.altsep:
713
+ seps = os.path.sep + os.path.altsep
714
+ else:
715
+ seps = os.path.sep
716
+ escaped_seps = "".join(map(re.escape, seps))
717
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
718
+ not_sep = f"[^{escaped_seps}]"
719
+ one_last_segment = f"{not_sep}+"
720
+ one_segment = f"{one_last_segment}{any_sep}"
721
+ any_segments = f"(?:.+{any_sep})?"
722
+ any_last_segments = ".*"
723
+ results = []
724
+ parts = re.split(any_sep, pat)
725
+ last_part_idx = len(parts) - 1
726
+ for idx, part in enumerate(parts):
727
+ if part == "*":
728
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
729
+ continue
730
+ if part == "**":
731
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
732
+ continue
733
+ elif "**" in part:
734
+ raise ValueError(
735
+ "Invalid pattern: '**' can only be an entire path component"
736
+ )
737
+ if part:
738
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
739
+ if idx < last_part_idx:
740
+ results.append(any_sep)
741
+ res = "".join(results)
742
+ return rf"(?s:{res})\Z"
lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so ADDED
Binary file (332 kB). View file
 
lib/python3.11/site-packages/functorch/__init__.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import torch
7
+
8
+ from torch._functorch.deprecated import (
9
+ combine_state_for_ensemble,
10
+ functionalize,
11
+ grad,
12
+ grad_and_value,
13
+ hessian,
14
+ jacfwd,
15
+ jacrev,
16
+ jvp,
17
+ make_functional,
18
+ make_functional_with_buffers,
19
+ vjp,
20
+ vmap,
21
+ )
22
+
23
+ # utilities. Maybe these should go in their own namespace in the future?
24
+ from torch._functorch.make_functional import (
25
+ FunctionalModule,
26
+ FunctionalModuleWithBuffers,
27
+ )
28
+
29
+ # Top-level APIs. Please think carefully before adding something to the
30
+ # top-level namespace:
31
+ # - private helper functions should go into torch._functorch
32
+ # - very experimental things should go into functorch.experimental
33
+ # - compilation related things should go into functorch.compile
34
+
35
+ # Was never documented
36
+ from torch._functorch.python_key import make_fx
37
+
38
+ __version__ = torch.__version__
lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (947 Bytes). View file
 
lib/python3.11/site-packages/functorch/_src/__init__.py ADDED
File without changes
lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (229 Bytes). View file
 
lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.aot_autograd import (
5
+ aot_autograd_decompositions,
6
+ KNOWN_TYPES,
7
+ PytreeThunk,
8
+ )
lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (417 Bytes). View file
 
lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.eager_transforms import (
5
+ _assert_wrapped_functional,
6
+ _unwrap_functional_tensor,
7
+ )
lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (406 Bytes). View file
 
lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.make_functional import _swap_state
lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (329 Bytes). View file
 
lib/python3.11/site-packages/functorch/_src/vmap/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.vmap import (
5
+ _add_batch_dim,
6
+ _broadcast_to_and_flatten,
7
+ _create_batched_inputs,
8
+ _get_name,
9
+ _process_batched_inputs,
10
+ _remove_batch_dim,
11
+ _unwrap_batched,
12
+ _validate_and_get_batch_size,
13
+ Tensor,
14
+ tree_flatten,
15
+ tree_unflatten,
16
+ )
lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (705 Bytes). View file
 
lib/python3.11/site-packages/functorch/compile/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch._functorch import config
2
+ from torch._functorch.aot_autograd import (
3
+ aot_function,
4
+ aot_module,
5
+ aot_module_simplified,
6
+ compiled_function,
7
+ compiled_module,
8
+ get_aot_compilation_context,
9
+ get_aot_graph_name,
10
+ get_graph_being_compiled,
11
+ make_boxed_compiler,
12
+ make_boxed_func,
13
+ )
14
+ from torch._functorch.compilers import (
15
+ debug_compile,
16
+ default_decompositions,
17
+ draw_graph_compile,
18
+ memory_efficient_fusion,
19
+ nnc_jit,
20
+ nop,
21
+ print_compile,
22
+ ts_compile,
23
+ )
24
+ from torch._functorch.fx_minifier import minifier
25
+ from torch._functorch.partitioners import (
26
+ default_partition,
27
+ draw_graph,
28
+ draw_joint_graph,
29
+ min_cut_rematerialization_partition,
30
+ )
31
+ from torch._functorch.python_key import pythonkey_decompose
lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (1.47 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__init__.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dis
2
+ import inspect
3
+ from typing import Sequence, Union
4
+
5
+ import torch
6
+
7
+ import functorch._C
8
+ from functorch._C import dim as _C
9
+ from .tree_map import tree_flatten, tree_map
10
+ from .wrap_type import wrap_type
11
+
12
+ _C._patch_tensor_class()
13
+ dims, DimList, dimlists = _C.dims, _C.DimList, _C.dimlists
14
+
15
+
16
+ class DimensionMismatchError(Exception):
17
+ pass
18
+
19
+
20
+ class DimensionBindError(Exception):
21
+ pass
22
+
23
+
24
+ from . import op_properties
25
+
26
+ # use dict to avoid writing C++ bindings for set
27
+ pointwise = {t: True for t in op_properties.pointwise}
28
+
29
+ use_c = True
30
+ if not use_c:
31
+ from . import reference
32
+
33
+
34
+ class _Tensor:
35
+ # fast path around slow wrapping/unwrapping logic for simply queries used
36
+ # by the implementation...
37
+
38
+ @property
39
+ def dims(self):
40
+ return tuple(d for d in self._levels if isinstance(d, Dim))
41
+
42
+ def dim(self):
43
+ return self.ndim
44
+
45
+ if use_c:
46
+ __torch_function__ = classmethod(_C.__torch_function__)
47
+ expand = _C._instancemethod(_C.expand)
48
+ else:
49
+ __torch_function__ = reference.__torch_function__
50
+ expand = reference.expand
51
+
52
+ index = _C._instancemethod(_C.index)
53
+
54
+ def __repr__(self):
55
+ tensor, levels, ndim = self._tensor, self._levels, self.ndim
56
+ return f"{tensor}\nwith dims={tuple(l + ndim if isinstance(l, int) else l for l in levels)} sizes={tuple(tensor.size())}"
57
+
58
+
59
+ TensorLike = (_Tensor, torch.Tensor)
60
+
61
+
62
+ class Dim(_C.Dim, _Tensor):
63
+ # note that _C.Dim comes before tensor because we want the Dim API for things like size to take precendence.
64
+ # Tensor defines format, but we want to print Dims with special formatting
65
+ __format__ = object.__format__
66
+
67
+
68
+ class Tensor(_Tensor, _C.Tensor):
69
+ if not use_c:
70
+ from_batched = staticmethod(_C.Tensor_from_batched)
71
+ from_positional = staticmethod(_C.Tensor_from_positional)
72
+ sum = _C._instancemethod(_C.Tensor_sum)
73
+
74
+
75
+ def cat(tensors, dim, new_dim):
76
+ n = dims()
77
+ return stack(tensors, n, dim).index([n, dim], new_dim)
78
+
79
+
80
+ if use_c:
81
+ _wrap = _C._wrap
82
+
83
+ def _def(name, *args, **kwargs):
84
+ orig = getattr(torch.Tensor, name)
85
+ setattr(_Tensor, name, _C._instancemethod(_wrap(orig, *args, **kwargs)))
86
+
87
+ t__getitem__ = _C._instancemethod(_C.__getitem__)
88
+ stack = _C.stack
89
+ split = _C._instancemethod(_C.split)
90
+ else:
91
+ _wrap, _def = reference._wrap, reference._def
92
+ t__getitem__ = reference.t__getitem__
93
+ stack = reference.stack
94
+ split = reference.split
95
+
96
+ # note: there is no python reference
97
+ t__setitem__ = _C._instancemethod(_C.__setitem__)
98
+ # this is patched in the C API because otherwise torch.Tensor will
99
+ # no longer be considered a sequence and things will break
100
+ # torch.Tensor.__getitem__ = t__getitem__
101
+
102
+ _Tensor.__getitem__ = t__getitem__
103
+ # torch.Tensor.__setitem__ = t__setitem__
104
+ _Tensor.__setitem__ = t__setitem__
105
+
106
+ torch.Tensor.split = split
107
+ _Tensor.split = split
108
+ torch.Tensor.expand = _C._instancemethod(_C.expand)
109
+ torch.Tensor.index = _C._instancemethod(_C.index)
110
+ wrap_type(use_c, _Tensor, torch.Tensor, _Tensor.__torch_function__)
111
+ del _Tensor.ndim
112
+
113
+ if use_c:
114
+ _Tensor.order = _C._instancemethod(_C.order)
115
+ else:
116
+ _Tensor.order = reference.positional
117
+
118
+ _def("mean")
119
+ _def("sum")
120
+ _def("all")
121
+ _def("amax")
122
+ _def("amin")
123
+ _def("aminmax")
124
+ _def("any")
125
+ _def("count_nonzero")
126
+ _def("logsumexp")
127
+ _def("nanmean")
128
+ _def("nansum")
129
+ _def("prod")
130
+ _def("std", keepdim_offset=2)
131
+ _def("var", keepdim_offset=2)
132
+ _def("max", single_dim=True)
133
+ _def("min", single_dim=True)
134
+ _def("argmax", single_dim=True)
135
+ _def("argmin", single_dim=True)
136
+ _def("kthvalue", single_dim=True)
137
+ _def("median", single_dim=True)
138
+ _def("nanmedian", single_dim=True)
139
+ _def("mode", single_dim=True)
140
+ _def("sort", reduce=False)
141
+ _def("argsort", reduce=False)
142
+ _def("unbind", single_dim=True)
143
+ _def("chunk", dim_offset=1, reduce=False)
144
+ _def("cummax", single_dim=True, reduce=False)
145
+ _def("cummin", single_dim=True, reduce=False)
146
+ _def("cumprod", single_dim=True, reduce=False)
147
+ _def("cumprod_", single_dim=True, reduce=False)
148
+ _def("cumsum", single_dim=True, reduce=False)
149
+ _def("cumsum_", single_dim=True, reduce=False)
150
+ _def("logcumsumexp", single_dim=True, reduce=False)
151
+ _def("renorm", dim_offset=1, single_dim=True, reduce=False)
152
+ _def("softmax", single_dim=True, reduce=False)
153
+ softmax = _wrap(torch.nn.functional.softmax, single_dim=True, reduce=False)
154
+
155
+ # stuff to handle in the future, because they require special
156
+ # binding logic for dims
157
+ # cross
158
+ # diag_embed
159
+ # diagonal
160
+ # diagonal_scatter
161
+ # diff
162
+ # nanquantile
163
+ # quantile
164
+ # roll
165
+ # rot90
166
+ # topk (new dimes on output)
167
+ # should these all be subsumed by inplace indexing?
168
+ # index_add_
169
+ # index_add
170
+ # index_copy
171
+ # index_copy_
172
+ # index_fill
173
+ # index_fill_
174
+ # index_select
175
+ # scatter
176
+ # scatter_
177
+ # scatter_add
178
+ # scatter_add_
179
+ # scatter_reduce
lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (7.99 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc ADDED
Binary file (1.29 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc ADDED
Binary file (5.61 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc ADDED
Binary file (6.89 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc ADDED
Binary file (2.49 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc ADDED
Binary file (12.1 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc ADDED
Binary file (32.4 kB). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc ADDED
Binary file (803 Bytes). View file
 
lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc ADDED
Binary file (2.55 kB). View file
 
lib/python3.11/site-packages/functorch/dim/batch_tensor.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ from contextlib import contextmanager
7
+
8
+ from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
9
+
10
+ _enabled = False
11
+
12
+
13
+ @contextmanager
14
+ def _enable_layers(dims):
15
+ global _enabled
16
+ assert not _enabled
17
+ input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
18
+ n = len(input)
19
+ try:
20
+ _vmap_add_layers(input)
21
+ _enabled = True
22
+ yield
23
+ finally:
24
+ _enabled = False
25
+ _vmap_remove_layers(n)
lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import torch
7
+
8
+ from . import _Tensor, Tensor
9
+ from .reference import _dims, _enable_layers, llist, ltuple
10
+
11
+
12
+ class DelayedMulTensor(_Tensor):
13
+ def __init__(self, lhs, rhs):
14
+ self._lhs, self._rhs = lhs, rhs
15
+ self._data = None
16
+ self._levels_data = None
17
+ self._has_device = lhs._has_device or rhs._has_device
18
+ self._batchtensor_data = None
19
+ self._tensor_data = None
20
+
21
+ @property
22
+ def _levels(self):
23
+ if self._levels_data is None:
24
+ levels = llist(self._lhs._levels)
25
+ for l in self._rhs._levels:
26
+ if l not in levels:
27
+ levels.append(l)
28
+ self._levels_data = ltuple(levels)
29
+ return self._levels_data
30
+
31
+ @property
32
+ def _batchtensor(self):
33
+ if self._batchtensor_data is None:
34
+ with _enable_layers(self._levels):
35
+ print("bt multiply fallback")
36
+ self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
37
+ return self._batchtensor_data
38
+
39
+ @property
40
+ def _tensor(self):
41
+ if self._tensor_data is None:
42
+ self._tensor_data = Tensor.from_batched(
43
+ self._batchtensor, self._has_device
44
+ )._tensor
45
+ return self._tensor_data
46
+
47
+ @property
48
+ def ndim(self):
49
+ return self._batchtensor.ndim
50
+
51
+ @property
52
+ def dims(self):
53
+ return ltuple(super().dims)
54
+
55
+ def sum(self, dim):
56
+ dims = _dims(dim, 0, False, False)
57
+ n = ord("a")
58
+ all_levels = self._levels
59
+
60
+ def to_char(d):
61
+ return chr(n + all_levels.index(d))
62
+
63
+ plhs, levelslhs = self._lhs._tensor, self._lhs._levels
64
+ prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
65
+ new_dims = tuple(d for d in self.dims if d not in dims)
66
+ new_levels = [l for l in self._levels if l not in dims]
67
+ fmt = "".join(
68
+ [
69
+ *(to_char(d) for d in levelslhs),
70
+ ",",
71
+ *(to_char(d) for d in levelsrhs),
72
+ "->",
73
+ *(to_char(d) for d in new_levels),
74
+ ]
75
+ )
76
+ result_data = torch.einsum(fmt, (plhs, prhs))
77
+ return Tensor.from_positional(result_data, new_levels, True)
lib/python3.11/site-packages/functorch/dim/dim.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ _vmap_levels = []
7
+
8
+
9
+ @dataclass
10
+ class LevelInfo:
11
+ level: int
12
+ alive: bool = True
13
+
14
+
15
+ class Dim:
16
+ def __init__(self, name: str, size: Union[None, int] = None):
17
+ self.name = name
18
+ self._size = None
19
+ self._vmap_level = None
20
+ if size is not None:
21
+ self.size = size
22
+
23
+ def __del__(self):
24
+ if self._vmap_level is not None:
25
+ _vmap_active_levels[self._vmap_stack].alive = False
26
+ while (
27
+ not _vmap_levels[-1].alive and current_level() == _vmap_levels[-1].level
28
+ ):
29
+ _vmap_decrement_nesting()
30
+ _vmap_levels.pop()
31
+
32
+ @property
33
+ def size(self):
34
+ assert self.is_bound
35
+ return self._size
36
+
37
+ @size.setter
38
+ def size(self, size: int):
39
+ if self._size is None:
40
+ self._size = size
41
+ self._vmap_level = _vmap_increment_nesting(size, "same")
42
+ self._vmap_stack = len(_vmap_levels)
43
+ _vmap_levels.append(LevelInfo(self._vmap_level))
44
+
45
+ elif self._size != size:
46
+ raise DimensionBindError(
47
+ f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
48
+ )
49
+
50
+ @property
51
+ def is_bound(self):
52
+ return self._size is not None
53
+
54
+ def __repr__(self):
55
+ return self.name
56
+
57
+
58
+ def extract_name(inst):
59
+ assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
60
+ return inst.argval
61
+
62
+
63
+ _cache = {}
64
+
65
+
66
+ def dims(lists=0):
67
+ frame = inspect.currentframe()
68
+ assert frame is not None
69
+ calling_frame = frame.f_back
70
+ assert calling_frame is not None
71
+ code, lasti = calling_frame.f_code, calling_frame.f_lasti
72
+ key = (code, lasti)
73
+ if key not in _cache:
74
+ first = lasti // 2 + 1
75
+ instructions = list(dis.get_instructions(calling_frame.f_code))
76
+ unpack = instructions[first]
77
+
78
+ if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
79
+ # just a single dim, not a list
80
+ name = unpack.argval
81
+ ctor = Dim if lists == 0 else DimList
82
+ _cache[key] = lambda: ctor(name=name)
83
+ else:
84
+ assert unpack.opname == "UNPACK_SEQUENCE"
85
+ ndims = unpack.argval
86
+ names = tuple(
87
+ extract_name(instructions[first + 1 + i]) for i in range(ndims)
88
+ )
89
+ first_list = len(names) - lists
90
+ _cache[key] = lambda: tuple(
91
+ Dim(n) if i < first_list else DimList(name=n)
92
+ for i, n in enumerate(names)
93
+ )
94
+ return _cache[key]()
95
+
96
+
97
+ def _dim_set(positional, arg):
98
+ def convert(a):
99
+ if isinstance(a, Dim):
100
+ return a
101
+ else:
102
+ assert isinstance(a, int)
103
+ return positional[a]
104
+
105
+ if arg is None:
106
+ return positional
107
+ elif not isinstance(arg, (Dim, int)):
108
+ return tuple(convert(a) for a in arg)
109
+ else:
110
+ return (convert(arg),)
lib/python3.11/site-packages/functorch/dim/magic_trace.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import os
7
+ import signal
8
+ import subprocess
9
+ from contextlib import contextmanager
10
+
11
+
12
+ @contextmanager
13
+ def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
14
+ pid = os.getpid()
15
+ if not os.path.exists(magic_trace_cache):
16
+ print(f"Downloading magic_trace to: {magic_trace_cache}")
17
+ subprocess.run(
18
+ [
19
+ "wget",
20
+ "-O",
21
+ magic_trace_cache,
22
+ "-q",
23
+ "https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
24
+ ]
25
+ )
26
+ subprocess.run(["chmod", "+x", magic_trace_cache])
27
+ args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
28
+ p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
29
+ while True:
30
+ x = p.stderr.readline()
31
+ print(x)
32
+ if "Attached" in x:
33
+ break
34
+ try:
35
+ yield
36
+ finally:
37
+ p.send_signal(signal.SIGINT)
38
+ r = p.wait()
39
+ print(p.stderr.read())
40
+ p.stderr.close()
41
+ if r != 0:
42
+ raise ValueError(f"magic_trace exited abnormally: {r}")