reach-vb HF staff commited on Jan 7

Commit

8f05c80

•

1 Parent(s): 6017349

da0609001d38541f2e1d84b2fab95a3e5cb5413337fc2247150c3f19aae1664e

Browse files

Files changed (50) hide show

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER +1 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE +29 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA +168 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD +104 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL +5 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt +1 -0
lib/python3.11/site-packages/fsspec/parquet.py +551 -0
lib/python3.11/site-packages/fsspec/registry.py +299 -0
lib/python3.11/site-packages/fsspec/spec.py +1963 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +543 -0
lib/python3.11/site-packages/fsspec/tests/abstract/get.py +587 -0
lib/python3.11/site-packages/fsspec/tests/abstract/put.py +577 -0
lib/python3.11/site-packages/fsspec/transaction.py +85 -0
lib/python3.11/site-packages/fsspec/utils.py +742 -0
lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so +0 -0
lib/python3.11/site-packages/functorch/__init__.py +38 -0
lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/_src/__init__.py +0 -0
lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py +7 -0
lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__init__.py +179 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc +0 -0
lib/python3.11/site-packages/functorch/dim/batch_tensor.py +25 -0
lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
lib/python3.11/site-packages/functorch/dim/dim.py +110 -0
lib/python3.11/site-packages/functorch/dim/magic_trace.py +42 -0

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2018, Martin Durant
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,168 @@

+Metadata-Version: 2.1
+Name: fsspec
+Version: 2023.12.2
+Summary: File-system specification
+Home-page: https://github.com/fsspec/filesystem_spec
+Maintainer: Martin Durant
+Maintainer-email: [email protected]
+License: BSD
+Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
+Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
+Keywords: file
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: abfs
+Requires-Dist: adlfs ; extra == 'abfs'
+Provides-Extra: adl
+Requires-Dist: adlfs ; extra == 'adl'
+Provides-Extra: arrow
+Requires-Dist: pyarrow >=1 ; extra == 'arrow'
+Provides-Extra: dask
+Requires-Dist: dask ; extra == 'dask'
+Requires-Dist: distributed ; extra == 'dask'
+Provides-Extra: devel
+Requires-Dist: pytest ; extra == 'devel'
+Requires-Dist: pytest-cov ; extra == 'devel'
+Provides-Extra: dropbox
+Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
+Requires-Dist: requests ; extra == 'dropbox'
+Requires-Dist: dropbox ; extra == 'dropbox'
+Provides-Extra: entrypoints
+Provides-Extra: full
+Requires-Dist: adlfs ; extra == 'full'
+Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
+Requires-Dist: dask ; extra == 'full'
+Requires-Dist: distributed ; extra == 'full'
+Requires-Dist: dropbox ; extra == 'full'
+Requires-Dist: dropboxdrivefs ; extra == 'full'
+Requires-Dist: fusepy ; extra == 'full'
+Requires-Dist: gcsfs ; extra == 'full'
+Requires-Dist: libarchive-c ; extra == 'full'
+Requires-Dist: ocifs ; extra == 'full'
+Requires-Dist: panel ; extra == 'full'
+Requires-Dist: paramiko ; extra == 'full'
+Requires-Dist: pyarrow >=1 ; extra == 'full'
+Requires-Dist: pygit2 ; extra == 'full'
+Requires-Dist: requests ; extra == 'full'
+Requires-Dist: s3fs ; extra == 'full'
+Requires-Dist: smbprotocol ; extra == 'full'
+Requires-Dist: tqdm ; extra == 'full'
+Provides-Extra: fuse
+Requires-Dist: fusepy ; extra == 'fuse'
+Provides-Extra: gcs
+Requires-Dist: gcsfs ; extra == 'gcs'
+Provides-Extra: git
+Requires-Dist: pygit2 ; extra == 'git'
+Provides-Extra: github
+Requires-Dist: requests ; extra == 'github'
+Provides-Extra: gs
+Requires-Dist: gcsfs ; extra == 'gs'
+Provides-Extra: gui
+Requires-Dist: panel ; extra == 'gui'
+Provides-Extra: hdfs
+Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
+Provides-Extra: http
+Requires-Dist: requests ; extra == 'http'
+Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
+Provides-Extra: libarchive
+Requires-Dist: libarchive-c ; extra == 'libarchive'
+Provides-Extra: oci
+Requires-Dist: ocifs ; extra == 'oci'
+Provides-Extra: s3
+Requires-Dist: s3fs ; extra == 's3'
+Provides-Extra: sftp
+Requires-Dist: paramiko ; extra == 'sftp'
+Provides-Extra: smb
+Requires-Dist: smbprotocol ; extra == 'smb'
+Provides-Extra: ssh
+Requires-Dist: paramiko ; extra == 'ssh'
+Provides-Extra: tqdm
+Requires-Dist: tqdm ; extra == 'tqdm'
+# filesystem_spec
+[![PyPI version](https://badge.fury.io/py/fsspec.svg)](https://pypi.python.org/pypi/fsspec/)
+[![Anaconda-Server Badge](https://anaconda.org/conda-forge/fsspec/badges/version.svg)](https://anaconda.org/conda-forge/fsspec)
+![Build](https://github.com/fsspec/filesystem_spec/workflows/CI/badge.svg)
+[![Docs](https://readthedocs.org/projects/filesystem-spec/badge/?version=latest)](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
+[![PyPi downloads](https://img.shields.io/pypi/dm/fsspec?label=pypi%20downloads&style=flat)](https://pepy.tech/project/fsspec)
+A specification for pythonic filesystems.
+## Install
+```bash
+pip install fsspec
+```
+would install the base fsspec. Various optionally supported features might require specification of custom
+extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
+Use `pip install fsspec[full]` for installation of all known extra dependencies.
+Up-to-date package also provided through conda-forge distribution:
+```bash
+conda install -c conda-forge fsspec
+```
+## Purpose
+To produce a template or specification for a file-system interface, that specific implementations should follow,
+so that applications making use of them can rely on a common behaviour and not have to worry about the specific
+internal implementation decisions with any given backend. Many such implementations are included in this package,
+or in sister projects such as `s3fs` and `gcsfs`.
+In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
+mounting of the file-system implementation may be available for all implementations "for free".
+## Documentation
+Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
+## Develop
+fsspec uses GitHub Actions for CI. Environment files can be found
+in the "ci/" directory. Note that the main environment is called "py38",
+but it is expected that the version of python installed be adjustable at
+CI runtime. For local use, pick a version suitable for you.
+### Testing
+Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
+The full fsspec suite requires a system-level docker, docker-compose, and fuse
+installation. If only making changes to one backend implementation, it is
+not generally necessary to run all tests locally.
+It is expected that contributors ensure that any change to fsspec does not
+cause issues or regressions for either other fsspec-related packages such
+as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
+run and corresponding environment file run a set of tests from the dask
+test suite, and very minimal tests against pandas and zarr from the
+test_downstream.py module in this repo.
+### Code Formatting
+fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
+a consistent code format throughout the project.
+Run ``black fsspec`` from the root of the filesystem_spec repository to
+auto-format your code. Additionally, many editors have plugins that will apply
+``black`` as you edit files. ``black`` is included in the ``tox`` environments.
+Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
+automatically run ``black`` when you make a git commit.
+Run ``pre-commit install --install-hooks`` from the root of the
+filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
+before you commit, reformatting any changed files. You can format without
+committing via ``pre-commit run`` or skip these checks with ``git commit
+--no-verify``.

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,104 @@

+fsspec-2023.12.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+fsspec-2023.12.2.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
+fsspec-2023.12.2.dist-info/METADATA,sha256=toLeg14fW_MfA33P2NVIPEyWFL7k004pAolypgHrECQ,6829
+fsspec-2023.12.2.dist-info/RECORD,,
+fsspec-2023.12.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+fsspec-2023.12.2.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
+fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
+fsspec/__pycache__/__init__.cpython-311.pyc,,
+fsspec/__pycache__/_version.cpython-311.pyc,,
+fsspec/__pycache__/archive.cpython-311.pyc,,
+fsspec/__pycache__/asyn.cpython-311.pyc,,
+fsspec/__pycache__/caching.cpython-311.pyc,,
+fsspec/__pycache__/callbacks.cpython-311.pyc,,
+fsspec/__pycache__/compression.cpython-311.pyc,,
+fsspec/__pycache__/config.cpython-311.pyc,,
+fsspec/__pycache__/conftest.cpython-311.pyc,,
+fsspec/__pycache__/core.cpython-311.pyc,,
+fsspec/__pycache__/dircache.cpython-311.pyc,,
+fsspec/__pycache__/exceptions.cpython-311.pyc,,
+fsspec/__pycache__/fuse.cpython-311.pyc,,
+fsspec/__pycache__/generic.cpython-311.pyc,,
+fsspec/__pycache__/gui.cpython-311.pyc,,
+fsspec/__pycache__/mapping.cpython-311.pyc,,
+fsspec/__pycache__/parquet.cpython-311.pyc,,
+fsspec/__pycache__/registry.cpython-311.pyc,,
+fsspec/__pycache__/spec.cpython-311.pyc,,
+fsspec/__pycache__/transaction.cpython-311.pyc,,
+fsspec/__pycache__/utils.cpython-311.pyc,,
+fsspec/_version.py,sha256=Kf9CIUDExVlqHjn9lLOn0QJcfeRWAe0PFvFHkRzI9iA,501
+fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
+fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
+fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
+fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
+fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
+fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
+fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
+fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
+fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
+fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
+fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
+fsspec/generic.py,sha256=2EcEegwdTLyQ2qSgz3Y6cbAuiWz7bybsEWai_XYkGtw,13457
+fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
+fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
+fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
+fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
+fsspec/implementations/__pycache__/data.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
+fsspec/implementations/__pycache__/git.cpython-311.pyc,,
+fsspec/implementations/__pycache__/github.cpython-311.pyc,,
+fsspec/implementations/__pycache__/http.cpython-311.pyc,,
+fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
+fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
+fsspec/implementations/__pycache__/local.cpython-311.pyc,,
+fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
+fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
+fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
+fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
+fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
+fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
+fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
+fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
+fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
+fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
+fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
+fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
+fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
+fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
+fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
+fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
+fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
+fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
+fsspec/implementations/http.py,sha256=cK7HQdVgR8PVLWkB0q0xsXohOP16X-zQiT2uqB1Kq4E,29265
+fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
+fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
+fsspec/implementations/local.py,sha256=GV5OltZrz9aOM8KKSx3T7QE7-U9KX3BOz3Eql3jw_xY,13371
+fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
+fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
+fsspec/implementations/sftp.py,sha256=TNmXVac9c5H9Gmiee2EjZNKXnXdkwwaNL2cHDkp_gG4,5632
+fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
+fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
+fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
+fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
+fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
+fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
+fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
+fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
+fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
+fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
+fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
+fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
+fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
+fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
+fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
+fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
+fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fsspec

lib/python3.11/site-packages/fsspec/parquet.py ADDED Viewed

	@@ -0,0 +1,551 @@

+import io
+import json
+import warnings
+from .core import url_to_fs
+from .utils import merge_offset_ranges
+# Parquet-Specific Utilities for fsspec
+#
+# Most of the functions defined in this module are NOT
+# intended for public consumption. The only exception
+# to this is `open_parquet_file`, which should be used
+# place of `fs.open()` to open parquet-formatted files
+# on remote file systems.
+def open_parquet_file(
+    path,
+    mode="rb",
+    fs=None,
+    metadata=None,
+    columns=None,
+    row_groups=None,
+    storage_options=None,
+    strict=False,
+    engine="auto",
+    max_gap=64_000,
+    max_block=256_000_000,
+    footer_sample_size=1_000_000,
+    **kwargs,
+):
+    """
+    Return a file-like object for a single Parquet file.
+    The specified parquet `engine` will be used to parse the
+    footer metadata, and determine the required byte ranges
+    from the file. The target path will then be opened with
+    the "parts" (`KnownPartsOfAFile`) caching strategy.
+    Note that this method is intended for usage with remote
+    file systems, and is unlikely to improve parquet-read
+    performance on local file systems.
+    Parameters
+    ----------
+    path: str
+        Target file path.
+    mode: str, optional
+        Mode option to be passed through to `fs.open`. Default is "rb".
+    metadata: Any, optional
+        Parquet metadata object. Object type must be supported
+        by the backend parquet engine. For now, only the "fastparquet"
+        engine supports an explicit `ParquetFile` metadata object.
+        If a metadata object is supplied, the remote footer metadata
+        will not need to be transferred into local memory.
+    fs: AbstractFileSystem, optional
+        Filesystem object to use for opening the file. If nothing is
+        specified, an `AbstractFileSystem` object will be inferred.
+    engine : str, default "auto"
+        Parquet engine to use for metadata parsing. Allowed options
+        include "fastparquet", "pyarrow", and "auto". The specified
+        engine must be installed in the current environment. If
+        "auto" is specified, and both engines are installed,
+        "fastparquet" will take precedence over "pyarrow".
+    columns: list, optional
+        List of all column names that may be read from the file.
+    row_groups : list, optional
+        List of all row-groups that may be read from the file. This
+        may be a list of row-group indices (integers), or it may be
+        a list of `RowGroup` metadata objects (if the "fastparquet"
+        engine is used).
+    storage_options : dict, optional
+        Used to generate an `AbstractFileSystem` object if `fs` was
+        not specified.
+    strict : bool, optional
+        Whether the resulting `KnownPartsOfAFile` cache should
+        fetch reads that go beyond a known byte-range boundary.
+        If `False` (the default), any read that ends outside a
+        known part will be zero padded. Note that using
+        `strict=True` may be useful for debugging.
+    max_gap : int, optional
+        Neighboring byte ranges will only be merged when their
+        inter-range gap is <= `max_gap`. Default is 64KB.
+    max_block : int, optional
+        Neighboring byte ranges will only be merged when the size of
+        the aggregated range is <= `max_block`. Default is 256MB.
+    footer_sample_size : int, optional
+        Number of bytes to read from the end of the path to look
+        for the footer metadata. If the sampled bytes do not contain
+        the footer, a second read request will be required, and
+        performance will suffer. Default is 1MB.
+    **kwargs :
+        Optional key-word arguments to pass to `fs.open`
+    """
+    # Make sure we have an `AbstractFileSystem` object
+    # to work with
+    if fs is None:
+        fs = url_to_fs(path, **(storage_options or {}))[0]
+    # For now, `columns == []` not supported. Just use
+    # default `open` command with `path` input
+    if columns is not None and len(columns) == 0:
+        return fs.open(path, mode=mode)
+    # Set the engine
+    engine = _set_engine(engine)
+    # Fetch the known byte ranges needed to read
+    # `columns` and/or `row_groups`
+    data = _get_parquet_byte_ranges(
+        [path],
+        fs,
+        metadata=metadata,
+        columns=columns,
+        row_groups=row_groups,
+        engine=engine,
+        max_gap=max_gap,
+        max_block=max_block,
+        footer_sample_size=footer_sample_size,
+    )
+    # Extract file name from `data`
+    fn = next(iter(data)) if data else path
+    # Call self.open with "parts" caching
+    options = kwargs.pop("cache_options", {}).copy()
+    return fs.open(
+        fn,
+        mode=mode,
+        cache_type="parts",
+        cache_options={
+            **options,
+            **{
+                "data": data.get(fn, {}),
+                "strict": strict,
+            },
+        },
+        **kwargs,
+    )
+def _get_parquet_byte_ranges(
+    paths,
+    fs,
+    metadata=None,
+    columns=None,
+    row_groups=None,
+    max_gap=64_000,
+    max_block=256_000_000,
+    footer_sample_size=1_000_000,
+    engine="auto",
+):
+    """Get a dictionary of the known byte ranges needed
+    to read a specific column/row-group selection from a
+    Parquet dataset. Each value in the output dictionary
+    is intended for use as the `data` argument for the
+    `KnownPartsOfAFile` caching strategy of a single path.
+    """
+    # Set engine if necessary
+    if isinstance(engine, str):
+        engine = _set_engine(engine)
+    # Pass to specialized function if metadata is defined
+    if metadata is not None:
+        # Use the provided parquet metadata object
+        # to avoid transferring/parsing footer metadata
+        return _get_parquet_byte_ranges_from_metadata(
+            metadata,
+            fs,
+            engine,
+            columns=columns,
+            row_groups=row_groups,
+            max_gap=max_gap,
+            max_block=max_block,
+        )
+    # Get file sizes asynchronously
+    file_sizes = fs.sizes(paths)
+    # Populate global paths, starts, & ends
+    result = {}
+    data_paths = []
+    data_starts = []
+    data_ends = []
+    add_header_magic = True
+    if columns is None and row_groups is None:
+        # We are NOT selecting specific columns or row-groups.
+        #
+        # We can avoid sampling the footers, and just transfer
+        # all file data with cat_ranges
+        for i, path in enumerate(paths):
+            result[path] = {}
+            for b in range(0, file_sizes[i], max_block):
+                data_paths.append(path)
+                data_starts.append(b)
+                data_ends.append(min(b + max_block, file_sizes[i]))
+        add_header_magic = False  # "Magic" should already be included
+    else:
+        # We ARE selecting specific columns or row-groups.
+        #
+        # Gather file footers.
+        # We just take the last `footer_sample_size` bytes of each
+        # file (or the entire file if it is smaller than that)
+        footer_starts = []
+        footer_ends = []
+        for i, path in enumerate(paths):
+            footer_ends.append(file_sizes[i])
+            sample_size = max(0, file_sizes[i] - footer_sample_size)
+            footer_starts.append(sample_size)
+        footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
+        # Check our footer samples and re-sample if necessary.
+        missing_footer_starts = footer_starts.copy()
+        large_footer = 0
+        for i, path in enumerate(paths):
+            footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
+            real_footer_start = file_sizes[i] - (footer_size + 8)
+            if real_footer_start < footer_starts[i]:
+                missing_footer_starts[i] = real_footer_start
+                large_footer = max(large_footer, (footer_size + 8))
+        if large_footer:
+            warnings.warn(
+                f"Not enough data was used to sample the parquet footer. "
+                f"Try setting footer_sample_size >= {large_footer}."
+            )
+            for i, block in enumerate(
+                fs.cat_ranges(
+                    paths,
+                    missing_footer_starts,
+                    footer_starts,
+                )
+            ):
+                footer_samples[i] = block + footer_samples[i]
+                footer_starts[i] = missing_footer_starts[i]
+        # Calculate required byte ranges for each path
+        for i, path in enumerate(paths):
+            # Deal with small-file case.
+            # Just include all remaining bytes of the file
+            # in a single range.
+            if file_sizes[i] < max_block:
+                if footer_starts[i] > 0:
+                    # Only need to transfer the data if the
+                    # footer sample isn't already the whole file
+                    data_paths.append(path)
+                    data_starts.append(0)
+                    data_ends.append(footer_starts[i])
+                continue
+            # Use "engine" to collect data byte ranges
+            path_data_starts, path_data_ends = engine._parquet_byte_ranges(
+                columns,
+                row_groups=row_groups,
+                footer=footer_samples[i],
+                footer_start=footer_starts[i],
+            )
+            data_paths += [path] * len(path_data_starts)
+            data_starts += path_data_starts
+            data_ends += path_data_ends
+        # Merge adjacent offset ranges
+        data_paths, data_starts, data_ends = merge_offset_ranges(
+            data_paths,
+            data_starts,
+            data_ends,
+            max_gap=max_gap,
+            max_block=max_block,
+            sort=False,  # Should already be sorted
+        )
+        # Start by populating `result` with footer samples
+        for i, path in enumerate(paths):
+            result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
+    # Transfer the data byte-ranges into local memory
+    _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
+    # Add b"PAR1" to header if necessary
+    if add_header_magic:
+        _add_header_magic(result)
+    return result
+def _get_parquet_byte_ranges_from_metadata(
+    metadata,
+    fs,
+    engine,
+    columns=None,
+    row_groups=None,
+    max_gap=64_000,
+    max_block=256_000_000,
+):
+    """Simplified version of `_get_parquet_byte_ranges` for
+    the case that an engine-specific `metadata` object is
+    provided, and the remote footer metadata does not need to
+    be transferred before calculating the required byte ranges.
+    """
+    # Use "engine" to collect data byte ranges
+    data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
+        columns,
+        row_groups=row_groups,
+        metadata=metadata,
+    )
+    # Merge adjacent offset ranges
+    data_paths, data_starts, data_ends = merge_offset_ranges(
+        data_paths,
+        data_starts,
+        data_ends,
+        max_gap=max_gap,
+        max_block=max_block,
+        sort=False,  # Should be sorted
+    )
+    # Transfer the data byte-ranges into local memory
+    result = {fn: {} for fn in list(set(data_paths))}
+    _transfer_ranges(fs, result, data_paths, data_starts, data_ends)
+    # Add b"PAR1" to header
+    _add_header_magic(result)
+    return result
+def _transfer_ranges(fs, blocks, paths, starts, ends):
+    # Use cat_ranges to gather the data byte_ranges
+    ranges = (paths, starts, ends)
+    for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
+        blocks[path][(start, stop)] = data
+def _add_header_magic(data):
+    # Add b"PAR1" to file headers
+    for i, path in enumerate(list(data.keys())):
+        add_magic = True
+        for k in data[path].keys():
+            if k[0] == 0 and k[1] >= 4:
+                add_magic = False
+                break
+        if add_magic:
+            data[path][(0, 4)] = b"PAR1"
+def _set_engine(engine_str):
+    # Define a list of parquet engines to try
+    if engine_str == "auto":
+        try_engines = ("fastparquet", "pyarrow")
+    elif not isinstance(engine_str, str):
+        raise ValueError(
+            "Failed to set parquet engine! "
+            "Please pass 'fastparquet', 'pyarrow', or 'auto'"
+        )
+    elif engine_str not in ("fastparquet", "pyarrow"):
+        raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
+    else:
+        try_engines = [engine_str]
+    # Try importing the engines in `try_engines`,
+    # and choose the first one that succeeds
+    for engine in try_engines:
+        try:
+            if engine == "fastparquet":
+                return FastparquetEngine()
+            elif engine == "pyarrow":
+                return PyarrowEngine()
+        except ImportError:
+            pass
+    # Raise an error if a supported parquet engine
+    # was not found
+    raise ImportError(
+        f"The following parquet engines are not installed "
+        f"in your python environment: {try_engines}."
+        f"Please install 'fastparquert' or 'pyarrow' to "
+        f"utilize the `fsspec.parquet` module."
+    )
+class FastparquetEngine:
+    # The purpose of the FastparquetEngine class is
+    # to check if fastparquet can be imported (on initialization)
+    # and to define a `_parquet_byte_ranges` method. In the
+    # future, this class may also be used to define other
+    # methods/logic that are specific to fastparquet.
+    def __init__(self):
+        import fastparquet as fp
+        self.fp = fp
+    def _row_group_filename(self, row_group, pf):
+        return pf.row_group_filename(row_group)
+    def _parquet_byte_ranges(
+        self,
+        columns,
+        row_groups=None,
+        metadata=None,
+        footer=None,
+        footer_start=None,
+    ):
+        # Initialize offset ranges and define ParqetFile metadata
+        pf = metadata
+        data_paths, data_starts, data_ends = [], [], []
+        if pf is None:
+            pf = self.fp.ParquetFile(io.BytesIO(footer))
+        # Convert columns to a set and add any index columns
+        # specified in the pandas metadata (just in case)
+        column_set = None if columns is None else set(columns)
+        if column_set is not None and hasattr(pf, "pandas_metadata"):
+            md_index = [
+                ind
+                for ind in pf.pandas_metadata.get("index_columns", [])
+                # Ignore RangeIndex information
+                if not isinstance(ind, dict)
+            ]
+            column_set |= set(md_index)
+        # Check if row_groups is a list of integers
+        # or a list of row-group metadata
+        if row_groups and not isinstance(row_groups[0], int):
+            # Input row_groups contains row-group metadata
+            row_group_indices = None
+        else:
+            # Input row_groups contains row-group indices
+            row_group_indices = row_groups
+            row_groups = pf.row_groups
+        # Loop through column chunks to add required byte ranges
+        for r, row_group in enumerate(row_groups):
+            # Skip this row-group if we are targeting
+            # specific row-groups
+            if row_group_indices is None or r in row_group_indices:
+                # Find the target parquet-file path for `row_group`
+                fn = self._row_group_filename(row_group, pf)
+                for column in row_group.columns:
+                    name = column.meta_data.path_in_schema[0]
+                    # Skip this column if we are targeting a
+                    # specific columns
+                    if column_set is None or name in column_set:
+                        file_offset0 = column.meta_data.dictionary_page_offset
+                        if file_offset0 is None:
+                            file_offset0 = column.meta_data.data_page_offset
+                        num_bytes = column.meta_data.total_compressed_size
+                        if footer_start is None or file_offset0 < footer_start:
+                            data_paths.append(fn)
+                            data_starts.append(file_offset0)
+                            data_ends.append(
+                                min(
+                                    file_offset0 + num_bytes,
+                                    footer_start or (file_offset0 + num_bytes),
+                                )
+                            )
+        if metadata:
+            # The metadata in this call may map to multiple
+            # file paths. Need to include `data_paths`
+            return data_paths, data_starts, data_ends
+        return data_starts, data_ends
+class PyarrowEngine:
+    # The purpose of the PyarrowEngine class is
+    # to check if pyarrow can be imported (on initialization)
+    # and to define a `_parquet_byte_ranges` method. In the
+    # future, this class may also be used to define other
+    # methods/logic that are specific to pyarrow.
+    def __init__(self):
+        import pyarrow.parquet as pq
+        self.pq = pq
+    def _row_group_filename(self, row_group, metadata):
+        raise NotImplementedError
+    def _parquet_byte_ranges(
+        self,
+        columns,
+        row_groups=None,
+        metadata=None,
+        footer=None,
+        footer_start=None,
+    ):
+        if metadata is not None:
+            raise ValueError("metadata input not supported for PyarrowEngine")
+        data_starts, data_ends = [], []
+        md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
+        # Convert columns to a set and add any index columns
+        # specified in the pandas metadata (just in case)
+        column_set = None if columns is None else set(columns)
+        if column_set is not None:
+            schema = md.schema.to_arrow_schema()
+            has_pandas_metadata = (
+                schema.metadata is not None and b"pandas" in schema.metadata
+            )
+            if has_pandas_metadata:
+                md_index = [
+                    ind
+                    for ind in json.loads(
+                        schema.metadata[b"pandas"].decode("utf8")
+                    ).get("index_columns", [])
+                    # Ignore RangeIndex information
+                    if not isinstance(ind, dict)
+                ]
+                column_set |= set(md_index)
+        # Loop through column chunks to add required byte ranges
+        for r in range(md.num_row_groups):
+            # Skip this row-group if we are targeting
+            # specific row-groups
+            if row_groups is None or r in row_groups:
+                row_group = md.row_group(r)
+                for c in range(row_group.num_columns):
+                    column = row_group.column(c)
+                    name = column.path_in_schema
+                    # Skip this column if we are targeting a
+                    # specific columns
+                    split_name = name.split(".")[0]
+                    if (
+                        column_set is None
+                        or name in column_set
+                        or split_name in column_set
+                    ):
+                        file_offset0 = column.dictionary_page_offset
+                        if file_offset0 is None:
+                            file_offset0 = column.data_page_offset
+                        num_bytes = column.total_compressed_size
+                        if file_offset0 < footer_start:
+                            data_starts.append(file_offset0)
+                            data_ends.append(
+                                min(file_offset0 + num_bytes, footer_start)
+                            )
+        return data_starts, data_ends

lib/python3.11/site-packages/fsspec/registry.py ADDED Viewed

	@@ -0,0 +1,299 @@

+from __future__ import annotations
+import importlib
+import types
+import warnings
+__all__ = ["registry", "get_filesystem_class", "default"]
+# internal, mutable
+_registry: dict[str, type] = {}
+# external, immutable
+registry = types.MappingProxyType(_registry)
+default = "file"
+def register_implementation(name, cls, clobber=False, errtxt=None):
+    """Add implementation class to the registry
+    Parameters
+    ----------
+    name: str
+        Protocol name to associate with the class
+    cls: class or str
+        if a class: fsspec-compliant implementation class (normally inherits from
+        ``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
+        str, the full path to an implementation class like package.module.class,
+        which gets added to known_implementations,
+        so the import is deferred until the filesystem is actually used.
+    clobber: bool (optional)
+        Whether to overwrite a protocol with the same name; if False, will raise
+        instead.
+    errtxt: str (optional)
+        If given, then a failure to import the given class will result in this
+        text being given.
+    """
+    if isinstance(cls, str):
+        if name in known_implementations and clobber is False:
+            if cls != known_implementations[name]["class"]:
+                raise ValueError(
+                    f"Name ({name}) already in the known_implementations and clobber "
+                    f"is False"
+                )
+        else:
+            known_implementations[name] = {
+                "class": cls,
+                "err": errtxt or f"{cls} import failed for protocol {name}",
+            }
+    else:
+        if name in registry and clobber is False:
+            if _registry[name] is not cls:
+                raise ValueError(
+                    f"Name ({name}) already in the registry and clobber is False"
+                )
+        else:
+            _registry[name] = cls
+# protocols mapped to the class which implements them. This dict can be
+# updated with register_implementation
+known_implementations = {
+    "data": {"class": "fsspec.implementations.data.DataFileSystem"},
+    "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
+    "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
+    "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
+    "dropbox": {
+        "class": "dropboxdrivefs.DropboxDriveFileSystem",
+        "err": (
+            'DropboxFileSystem requires "dropboxdrivefs",'
+            '"requests" and "dropbox" to be installed'
+        ),
+    },
+    "http": {
+        "class": "fsspec.implementations.http.HTTPFileSystem",
+        "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
+    },
+    "https": {
+        "class": "fsspec.implementations.http.HTTPFileSystem",
+        "err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
+    },
+    "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
+    "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
+    "gcs": {
+        "class": "gcsfs.GCSFileSystem",
+        "err": "Please install gcsfs to access Google Storage",
+    },
+    "gs": {
+        "class": "gcsfs.GCSFileSystem",
+        "err": "Please install gcsfs to access Google Storage",
+    },
+    "gdrive": {
+        "class": "gdrivefs.GoogleDriveFileSystem",
+        "err": "Please install gdrivefs for access to Google Drive",
+    },
+    "sftp": {
+        "class": "fsspec.implementations.sftp.SFTPFileSystem",
+        "err": 'SFTPFileSystem requires "paramiko" to be installed',
+    },
+    "ssh": {
+        "class": "fsspec.implementations.sftp.SFTPFileSystem",
+        "err": 'SFTPFileSystem requires "paramiko" to be installed',
+    },
+    "ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
+    "hdfs": {
+        "class": "fsspec.implementations.arrow.HadoopFileSystem",
+        "err": "pyarrow and local java libraries required for HDFS",
+    },
+    "arrow_hdfs": {
+        "class": "fsspec.implementations.arrow.HadoopFileSystem",
+        "err": "pyarrow and local java libraries required for HDFS",
+    },
+    "webhdfs": {
+        "class": "fsspec.implementations.webhdfs.WebHDFS",
+        "err": 'webHDFS access requires "requests" to be installed',
+    },
+    "s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
+    "s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
+    "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
+    "oci": {
+        "class": "ocifs.OCIFileSystem",
+        "err": "Install ocifs to access OCI Object Storage",
+    },
+    "ocilake": {
+        "class": "ocifs.OCIFileSystem",
+        "err": "Install ocifs to access OCI Data Lake",
+    },
+    "asynclocal": {
+        "class": "morefs.asyn_local.AsyncLocalFileSystem",
+        "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
+    },
+    "adl": {
+        "class": "adlfs.AzureDatalakeFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen1",
+    },
+    "abfs": {
+        "class": "adlfs.AzureBlobFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
+    },
+    "az": {
+        "class": "adlfs.AzureBlobFileSystem",
+        "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
+    },
+    "cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
+    "blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
+    "filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
+    "simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
+    "dask": {
+        "class": "fsspec.implementations.dask.DaskWorkerFileSystem",
+        "err": "Install dask distributed to access worker file system",
+    },
+    "dbfs": {
+        "class": "fsspec.implementations.dbfs.DatabricksFileSystem",
+        "err": "Install the requests package to use the DatabricksFileSystem",
+    },
+    "github": {
+        "class": "fsspec.implementations.github.GithubFileSystem",
+        "err": "Install the requests package to use the github FS",
+    },
+    "git": {
+        "class": "fsspec.implementations.git.GitFileSystem",
+        "err": "Install pygit2 to browse local git repos",
+    },
+    "smb": {
+        "class": "fsspec.implementations.smb.SMBFileSystem",
+        "err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
+    },
+    "jupyter": {
+        "class": "fsspec.implementations.jupyter.JupyterFileSystem",
+        "err": "Jupyter FS requires requests to be installed",
+    },
+    "jlab": {
+        "class": "fsspec.implementations.jupyter.JupyterFileSystem",
+        "err": "Jupyter FS requires requests to be installed",
+    },
+    "libarchive": {
+        "class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
+        "err": "LibArchive requires to be installed",
+    },
+    "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
+    "generic": {"class": "fsspec.generic.GenericFileSystem"},
+    "oss": {
+        "class": "ossfs.OSSFileSystem",
+        "err": "Install ossfs to access Alibaba Object Storage System",
+    },
+    "webdav": {
+        "class": "webdav4.fsspec.WebdavFileSystem",
+        "err": "Install webdav4 to access WebDAV",
+    },
+    "dvc": {
+        "class": "dvc.api.DVCFileSystem",
+        "err": "Install dvc to access DVCFileSystem",
+    },
+    "hf": {
+        "class": "huggingface_hub.HfFileSystem",
+        "err": "Install huggingface_hub to access HfFileSystem",
+    },
+    "root": {
+        "class": "fsspec_xrootd.XRootDFileSystem",
+        "err": "Install fsspec-xrootd to access xrootd storage system."
+        + " Note: 'root' is the protocol name for xrootd storage systems,"
+        + " not referring to root directories",
+    },
+    "dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
+    "box": {
+        "class": "boxfs.BoxFileSystem",
+        "err": "Please install boxfs to access BoxFileSystem",
+    },
+    "lakefs": {
+        "class": "lakefs_spec.LakeFSFileSystem",
+        "err": "Please install lakefs-spec to access LakeFSFileSystem",
+    },
+}
+def get_filesystem_class(protocol):
+    """Fetch named protocol implementation from the registry
+    The dict ``known_implementations`` maps protocol names to the locations
+    of classes implementing the corresponding file-system. When used for the
+    first time, appropriate imports will happen and the class will be placed in
+    the registry. All subsequent calls will fetch directly from the registry.
+    Some protocol implementations require additional dependencies, and so the
+    import may fail. In this case, the string in the "err" field of the
+    ``known_implementations`` will be given as the error message.
+    """
+    if not protocol:
+        protocol = default
+    if protocol not in registry:
+        if protocol not in known_implementations:
+            raise ValueError(f"Protocol not known: {protocol}")
+        bit = known_implementations[protocol]
+        try:
+            register_implementation(protocol, _import_class(bit["class"]))
+        except ImportError as e:
+            raise ImportError(bit["err"]) from e
+    cls = registry[protocol]
+    if getattr(cls, "protocol", None) in ("abstract", None):
+        cls.protocol = protocol
+    return cls
+s3_msg = """Your installed version of s3fs is very old and known to cause
+severe performance issues, see also https://github.com/dask/dask/issues/10276
+To fix, you should specify a lower version bound on s3fs, or
+update the current installation.
+"""
+def _import_class(cls, minv=None):
+    """Take a string FQP and return the imported class or identifier
+    clas is of the form "package.module.klass" or "package.module:subobject.klass"
+    """
+    if ":" in cls:
+        mod, name = cls.rsplit(":", 1)
+        s3 = mod == "s3fs"
+        mod = importlib.import_module(mod)
+        if s3 and mod.__version__.split(".") < ["0", "5"]:
+            warnings.warn(s3_msg)
+        for part in name.split("."):
+            mod = getattr(mod, part)
+        return mod
+    else:
+        mod, name = cls.rsplit(".", 1)
+        s3 = mod == "s3fs"
+        mod = importlib.import_module(mod)
+        if s3 and mod.__version__.split(".") < ["0", "5"]:
+            warnings.warn(s3_msg)
+        return getattr(mod, name)
+def filesystem(protocol, **storage_options):
+    """Instantiate filesystems for given protocol and arguments
+    ``storage_options`` are specific to the protocol being chosen, and are
+    passed directly to the class.
+    """
+    if protocol == "arrow_hdfs":
+        warnings.warn(
+            "The 'arrow_hdfs' protocol has been deprecated and will be "
+            "removed in the future. Specify it as 'hdfs'.",
+            DeprecationWarning,
+        )
+    cls = get_filesystem_class(protocol)
+    return cls(**storage_options)
+def available_protocols():
+    """Return a list of the implemented protocols.
+    Note that any given protocol may require extra packages to be importable.
+    """
+    return list(known_implementations)

lib/python3.11/site-packages/fsspec/spec.py ADDED Viewed

	@@ -0,0 +1,1963 @@

+from __future__ import annotations
+import io
+import logging
+import os
+import threading
+import warnings
+import weakref
+from errno import ESPIPE
+from glob import has_magic
+from hashlib import sha256
+from typing import ClassVar
+from .callbacks import _DEFAULT_CALLBACK
+from .config import apply_config, conf
+from .dircache import DirCache
+from .transaction import Transaction
+from .utils import (
+    _unstrip_protocol,
+    glob_translate,
+    isfilelike,
+    other_paths,
+    read_block,
+    stringify_path,
+    tokenize,
+)
+logger = logging.getLogger("fsspec")
+def make_instance(cls, args, kwargs):
+    return cls(*args, **kwargs)
+class _Cached(type):
+    """
+    Metaclass for caching file system instances.
+    Notes
+    -----
+    Instances are cached according to
+    * The values of the class attributes listed in `_extra_tokenize_attributes`
+    * The arguments passed to ``__init__``.
+    This creates an additional reference to the filesystem, which prevents the
+    filesystem from being garbage collected when all *user* references go away.
+    A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
+    be made for a filesystem instance to be garbage collected.
+    """
+    def __init__(cls, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Note: we intentionally create a reference here, to avoid garbage
+        # collecting instances when all other references are gone. To really
+        # delete a FileSystem, the cache must be cleared.
+        if conf.get("weakref_instance_cache"):  # pragma: no cover
+            # debug option for analysing fork/spawn conditions
+            cls._cache = weakref.WeakValueDictionary()
+        else:
+            cls._cache = {}
+        cls._pid = os.getpid()
+    def __call__(cls, *args, **kwargs):
+        kwargs = apply_config(cls, kwargs)
+        extra_tokens = tuple(
+            getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
+        )
+        token = tokenize(
+            cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
+        )
+        skip = kwargs.pop("skip_instance_cache", False)
+        if os.getpid() != cls._pid:
+            cls._cache.clear()
+            cls._pid = os.getpid()
+        if not skip and cls.cachable and token in cls._cache:
+            cls._latest = token
+            return cls._cache[token]
+        else:
+            obj = super().__call__(*args, **kwargs)
+            # Setting _fs_token here causes some static linters to complain.
+            obj._fs_token_ = token
+            obj.storage_args = args
+            obj.storage_options = kwargs
+            if obj.async_impl and obj.mirror_sync_methods:
+                from .asyn import mirror_sync_methods
+                mirror_sync_methods(obj)
+            if cls.cachable and not skip:
+                cls._latest = token
+                cls._cache[token] = obj
+            return obj
+class AbstractFileSystem(metaclass=_Cached):
+    """
+    An abstract super-class for pythonic file-systems
+    Implementations are expected to be compatible with or, better, subclass
+    from here.
+    """
+    cachable = True  # this class can be cached, instances reused
+    _cached = False
+    blocksize = 2**22
+    sep = "/"
+    protocol: ClassVar[str | tuple[str, ...]] = "abstract"
+    _latest = None
+    async_impl = False
+    mirror_sync_methods = False
+    root_marker = ""  # For some FSs, may require leading '/' or other character
+    transaction_type = Transaction
+    #: Extra *class attributes* that should be considered when hashing.
+    _extra_tokenize_attributes = ()
+    def __init__(self, *args, **storage_options):
+        """Create and configure file-system instance
+        Instances may be cachable, so if similar enough arguments are seen
+        a new instance is not required. The token attribute exists to allow
+        implementations to cache instances if they wish.
+        A reasonable default should be provided if there are no arguments.
+        Subclasses should call this method.
+        Parameters
+        ----------
+        use_listings_cache, listings_expiry_time, max_paths:
+            passed to ``DirCache``, if the implementation supports
+            directory listing caching. Pass use_listings_cache=False
+            to disable such caching.
+        skip_instance_cache: bool
+            If this is a cachable implementation, pass True here to force
+            creating a new instance even if a matching instance exists, and prevent
+            storing this instance.
+        asynchronous: bool
+        loop: asyncio-compatible IOLoop or None
+        """
+        if self._cached:
+            # reusing instance, don't change
+            return
+        self._cached = True
+        self._intrans = False
+        self._transaction = None
+        self._invalidated_caches_in_transaction = []
+        self.dircache = DirCache(**storage_options)
+        if storage_options.pop("add_docs", None):
+            warnings.warn("add_docs is no longer supported.", FutureWarning)
+        if storage_options.pop("add_aliases", None):
+            warnings.warn("add_aliases has been removed.", FutureWarning)
+        # This is set in _Cached
+        self._fs_token_ = None
+    @property
+    def fsid(self):
+        """Persistent filesystem id that can be used to compare filesystems
+        across sessions.
+        """
+        raise NotImplementedError
+    @property
+    def _fs_token(self):
+        return self._fs_token_
+    def __dask_tokenize__(self):
+        return self._fs_token
+    def __hash__(self):
+        return int(self._fs_token, 16)
+    def __eq__(self, other):
+        return isinstance(other, type(self)) and self._fs_token == other._fs_token
+    def __reduce__(self):
+        return make_instance, (type(self), self.storage_args, self.storage_options)
+    @classmethod
+    def _strip_protocol(cls, path):
+        """Turn path from fully-qualified to file-system-specific
+        May require FS-specific handling, e.g., for relative paths or links.
+        """
+        if isinstance(path, list):
+            return [cls._strip_protocol(p) for p in path]
+        path = stringify_path(path)
+        protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
+        for protocol in protos:
+            if path.startswith(protocol + "://"):
+                path = path[len(protocol) + 3 :]
+            elif path.startswith(protocol + "::"):
+                path = path[len(protocol) + 2 :]
+        path = path.rstrip("/")
+        # use of root_marker to make minimum required path, e.g., "/"
+        return path or cls.root_marker
+    def unstrip_protocol(self, name: str) -> str:
+        """Format FS-specific path to generic, including protocol"""
+        protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
+        for protocol in protos:
+            if name.startswith(f"{protocol}://"):
+                return name
+        return f"{protos[0]}://{name}"
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        """If kwargs can be encoded in the paths, extract them here
+        This should happen before instantiation of the class; incoming paths
+        then should be amended to strip the options in methods.
+        Examples may look like an sftp path "sftp://user@host:/my/path", where
+        the user and host should become kwargs and later get stripped.
+        """
+        # by default, nothing happens
+        return {}
+    @classmethod
+    def current(cls):
+        """Return the most recently instantiated FileSystem
+        If no instance has been created, then create one with defaults
+        """
+        if cls._latest in cls._cache:
+            return cls._cache[cls._latest]
+        return cls()
+    @property
+    def transaction(self):
+        """A context within which files are committed together upon exit
+        Requires the file class to implement `.commit()` and `.discard()`
+        for the normal and exception cases.
+        """
+        if self._transaction is None:
+            self._transaction = self.transaction_type(self)
+        return self._transaction
+    def start_transaction(self):
+        """Begin write transaction for deferring files, non-context version"""
+        self._intrans = True
+        self._transaction = self.transaction_type(self)
+        return self.transaction
+    def end_transaction(self):
+        """Finish write transaction, non-context version"""
+        self.transaction.complete()
+        self._transaction = None
+        # The invalid cache must be cleared after the transaction is completed.
+        for path in self._invalidated_caches_in_transaction:
+            self.invalidate_cache(path)
+        self._invalidated_caches_in_transaction.clear()
+    def invalidate_cache(self, path=None):
+        """
+        Discard any cached directory information
+        Parameters
+        ----------
+        path: string or None
+            If None, clear all listings cached else listings at or under given
+            path.
+        """
+        # Not necessary to implement invalidation mechanism, may have no cache.
+        # But if have, you should call this method of parent class from your
+        # subclass to ensure expiring caches after transacations correctly.
+        # See the implementation of FTPFileSystem in ftp.py
+        if self._intrans:
+            self._invalidated_caches_in_transaction.append(path)
+    def mkdir(self, path, create_parents=True, **kwargs):
+        """
+        Create directory entry at path
+        For systems that don't have true directories, may create an for
+        this instance only and not touch the real filesystem
+        Parameters
+        ----------
+        path: str
+            location
+        create_parents: bool
+            if True, this is equivalent to ``makedirs``
+        kwargs:
+            may be permissions, etc.
+        """
+        pass  # not necessary to implement, may not have directories
+    def makedirs(self, path, exist_ok=False):
+        """Recursively make directories
+        Creates directory at path and any intervening required directories.
+        Raises exception if, for instance, the path already exists but is a
+        file.
+        Parameters
+        ----------
+        path: str
+            leaf directory name
+        exist_ok: bool (False)
+            If False, will error if the target already exists
+        """
+        pass  # not necessary to implement, may not have directories
+    def rmdir(self, path):
+        """Remove a directory, if empty"""
+        pass  # not necessary to implement, may not have directories
+    def ls(self, path, detail=True, **kwargs):
+        """List objects at path.
+        This should include subdirectories and files at that location. The
+        difference between a file and a directory must be clear when details
+        are requested.
+        The specific keys, or perhaps a FileInfo class, or similar, is TBD,
+        but must be consistent across implementations.
+        Must include:
+        - full path to the entry (without protocol)
+        - size of the entry, in bytes. If the value cannot be determined, will
+          be ``None``.
+        - type of entry, "file", "directory" or other
+        Additional information
+        may be present, appropriate to the file-system, e.g., generation,
+        checksum, etc.
+        May use refresh=True|False to allow use of self._ls_from_cache to
+        check for a saved listing and avoid calling the backend. This would be
+        common where listing may be expensive.
+        Parameters
+        ----------
+        path: str
+        detail: bool
+            if True, gives a list of dictionaries, where each is the same as
+            the result of ``info(path)``. If False, gives a list of paths
+            (str).
+        kwargs: may have additional backend-specific options, such as version
+            information
+        Returns
+        -------
+        List of strings if detail is False, or list of directory information
+        dicts if detail is True.
+        """
+        raise NotImplementedError
+    def _ls_from_cache(self, path):
+        """Check cache for listing
+        Returns listing, if found (may be empty list for a directly that exists
+        but contains nothing), None if not in cache.
+        """
+        parent = self._parent(path)
+        if path.rstrip("/") in self.dircache:
+            return self.dircache[path.rstrip("/")]
+        try:
+            files = [
+                f
+                for f in self.dircache[parent]
+                if f["name"] == path
+                or (f["name"] == path.rstrip("/") and f["type"] == "directory")
+            ]
+            if len(files) == 0:
+                # parent dir was listed but did not contain this file
+                raise FileNotFoundError(path)
+            return files
+        except KeyError:
+            pass
+    def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
+        """Return all files belows path
+        List all files, recursing into subdirectories; output is iterator-style,
+        like ``os.walk()``. For a simple list of files, ``find()`` is available.
+        When topdown is True, the caller can modify the dirnames list in-place (perhaps
+        using del or slice assignment), and walk() will
+        only recurse into the subdirectories whose names remain in dirnames;
+        this can be used to prune the search, impose a specific order of visiting,
+        or even to inform walk() about directories the caller creates or renames before
+        it resumes walk() again.
+        Modifying dirnames when topdown is False has no effect. (see os.walk)
+        Note that the "files" outputted will include anything that is not
+        a directory, such as links.
+        Parameters
+        ----------
+        path: str
+            Root to recurse into
+        maxdepth: int
+            Maximum recursion depth. None means limitless, but not recommended
+            on link-based file-systems.
+        topdown: bool (True)
+            Whether to walk the directory tree from the top downwards or from
+            the bottom upwards.
+        on_error: "omit", "raise", a collable
+            if omit (default), path with exception will simply be empty;
+            If raise, an underlying exception will be raised;
+            if callable, it will be called with a single OSError instance as argument
+        kwargs: passed to ``ls``
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        path = self._strip_protocol(path)
+        full_dirs = {}
+        dirs = {}
+        files = {}
+        detail = kwargs.pop("detail", False)
+        try:
+            listing = self.ls(path, detail=True, **kwargs)
+        except (FileNotFoundError, OSError) as e:
+            if on_error == "raise":
+                raise
+            elif callable(on_error):
+                on_error(e)
+            if detail:
+                return path, {}, {}
+            return path, [], []
+        for info in listing:
+            # each info name must be at least [path]/part , but here
+            # we check also for names like [path]/part/
+            pathname = info["name"].rstrip("/")
+            name = pathname.rsplit("/", 1)[-1]
+            if info["type"] == "directory" and pathname != path:
+                # do not include "self" path
+                full_dirs[name] = pathname
+                dirs[name] = info
+            elif pathname == path:
+                # file-like with same name as give path
+                files[""] = info
+            else:
+                files[name] = info
+        if not detail:
+            dirs = list(dirs)
+            files = list(files)
+        if topdown:
+            # Yield before recursion if walking top down
+            yield path, dirs, files
+        if maxdepth is not None:
+            maxdepth -= 1
+            if maxdepth < 1:
+                if not topdown:
+                    yield path, dirs, files
+                return
+        for d in dirs:
+            yield from self.walk(
+                full_dirs[d],
+                maxdepth=maxdepth,
+                detail=detail,
+                topdown=topdown,
+                **kwargs,
+            )
+        if not topdown:
+            # Yield after recursion if walking bottom up
+            yield path, dirs, files
+    def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        """List all files below path.
+        Like posix ``find`` command without conditions
+        Parameters
+        ----------
+        path : str
+        maxdepth: int or None
+            If not None, the maximum number of levels to descend
+        withdirs: bool
+            Whether to include directory paths in the output. This is True
+            when used by glob, but users usually only want files.
+        kwargs are passed to ``ls``.
+        """
+        # TODO: allow equivalent of -name parameter
+        path = self._strip_protocol(path)
+        out = {}
+        # Add the root directory if withdirs is requested
+        # This is needed for posix glob compliance
+        if withdirs and path != "" and self.isdir(path):
+            out[path] = self.info(path)
+        for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
+            if withdirs:
+                files.update(dirs)
+            out.update({info["name"]: info for name, info in files.items()})
+        if not out and self.isfile(path):
+            # walk works on directories, but find should also return [path]
+            # when path happens to be a file
+            out[path] = {}
+        names = sorted(out)
+        if not detail:
+            return names
+        else:
+            return {name: out[name] for name in names}
+    def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
+        """Space used by files and optionally directories within a path
+        Directory size does not include the size of its contents.
+        Parameters
+        ----------
+        path: str
+        total: bool
+            Whether to sum all the file sizes
+        maxdepth: int or None
+            Maximum number of directory levels to descend, None for unlimited.
+        withdirs: bool
+            Whether to include directory paths in the output.
+        kwargs: passed to ``find``
+        Returns
+        -------
+        Dict of {path: size} if total=False, or int otherwise, where numbers
+        refer to bytes used.
+        """
+        sizes = {}
+        if withdirs and self.isdir(path):
+            # Include top-level directory in output
+            info = self.info(path)
+            sizes[info["name"]] = info["size"]
+        for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
+            info = self.info(f)
+            sizes[info["name"]] = info["size"]
+        if total:
+            return sum(sizes.values())
+        else:
+            return sizes
+    def glob(self, path, maxdepth=None, **kwargs):
+        """
+        Find files by glob-matching.
+        If the path ends with '/', only folders are returned.
+        We support ``"**"``,
+        ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
+        The `maxdepth` option is applied on the first `**` found in the path.
+        kwargs are passed to ``ls``.
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        import re
+        seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
+        ends_with_sep = path.endswith(seps)  # _strip_protocol strips trailing slash
+        path = self._strip_protocol(path)
+        append_slash_to_dirname = ends_with_sep or path.endswith(
+            tuple(sep + "**" for sep in seps)
+        )
+        idx_star = path.find("*") if path.find("*") >= 0 else len(path)
+        idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
+        idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
+        min_idx = min(idx_star, idx_qmark, idx_brace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            if self.exists(path, **kwargs):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: self.info(path, **kwargs)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:min_idx]:
+            min_idx = path[:min_idx].rindex("/")
+            root = path[: min_idx + 1]
+            depth = path[min_idx + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = path[min_idx + 1 :].count("/") + 1
+        if "**" in path:
+            if maxdepth is not None:
+                idx_double_stars = path.find("**")
+                depth_double_stars = path[idx_double_stars:].count("/") + 1
+                depth = depth - depth_double_stars + maxdepth
+            else:
+                depth = None
+        allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
+        pattern = glob_translate(path + ("/" if ends_with_sep else ""))
+        pattern = re.compile(pattern)
+        out = {
+            p: info
+            for p, info in sorted(allpaths.items())
+            if pattern.match(
+                (
+                    p + "/"
+                    if append_slash_to_dirname and info["type"] == "directory"
+                    else p
+                )
+            )
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    def exists(self, path, **kwargs):
+        """Is there a file at the given path"""
+        try:
+            self.info(path, **kwargs)
+            return True
+        except:  # noqa: E722
+            # any exception allowed bar FileNotFoundError?
+            return False
+    def lexists(self, path, **kwargs):
+        """If there is a file at the given path (including
+        broken links)"""
+        return self.exists(path)
+    def info(self, path, **kwargs):
+        """Give details of entry at path
+        Returns a single dictionary, with exactly the same information as ``ls``
+        would with ``detail=True``.
+        The default implementation should calls ls and could be overridden by a
+        shortcut. kwargs are passed on to ```ls()``.
+        Some file systems might not be able to measure the file's size, in
+        which case, the returned dict will include ``'size': None``.
+        Returns
+        -------
+        dict with keys: name (full path in the FS), size (in bytes), type (file,
+        directory, or something else) and other FS-specific keys.
+        """
+        path = self._strip_protocol(path)
+        out = self.ls(self._parent(path), detail=True, **kwargs)
+        out = [o for o in out if o["name"].rstrip("/") == path]
+        if out:
+            return out[0]
+        out = self.ls(path, detail=True, **kwargs)
+        path = path.rstrip("/")
+        out1 = [o for o in out if o["name"].rstrip("/") == path]
+        if len(out1) == 1:
+            if "size" not in out1[0]:
+                out1[0]["size"] = None
+            return out1[0]
+        elif len(out1) > 1 or out:
+            return {"name": path, "size": 0, "type": "directory"}
+        else:
+            raise FileNotFoundError(path)
+    def checksum(self, path):
+        """Unique value for current version of file
+        If the checksum is the same from one moment to another, the contents
+        are guaranteed to be the same. If the checksum changes, the contents
+        *might* have changed.
+        This should normally be overridden; default will probably capture
+        creation/modification timestamp (which would be good) or maybe
+        access timestamp (which would be bad)
+        """
+        return int(tokenize(self.info(path)), 16)
+    def size(self, path):
+        """Size in bytes of file"""
+        return self.info(path).get("size", None)
+    def sizes(self, paths):
+        """Size in bytes of each file in a list of paths"""
+        return [self.size(p) for p in paths]
+    def isdir(self, path):
+        """Is this entry directory-like?"""
+        try:
+            return self.info(path)["type"] == "directory"
+        except OSError:
+            return False
+    def isfile(self, path):
+        """Is this entry file-like?"""
+        try:
+            return self.info(path)["type"] == "file"
+        except:  # noqa: E722
+            return False
+    def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
+        """Get the contents of the file as a string.
+        Parameters
+        ----------
+        path: str
+            URL of file on this filesystems
+        encoding, errors, newline: same as `open`.
+        """
+        with self.open(
+            path,
+            mode="r",
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+            **kwargs,
+        ) as f:
+            return f.read()
+    def write_text(
+        self, path, value, encoding=None, errors=None, newline=None, **kwargs
+    ):
+        """Write the text to the given file.
+        An existing file will be overwritten.
+        Parameters
+        ----------
+        path: str
+            URL of file on this filesystems
+        value: str
+            Text to write.
+        encoding, errors, newline: same as `open`.
+        """
+        with self.open(
+            path,
+            mode="w",
+            encoding=encoding,
+            errors=errors,
+            newline=newline,
+            **kwargs,
+        ) as f:
+            return f.write(value)
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        """Get the content of a file
+        Parameters
+        ----------
+        path: URL of file on this filesystems
+        start, end: int
+            Bytes limits of the read. If negative, backwards from end,
+            like usual python slices. Either can be None for start or
+            end of file, respectively
+        kwargs: passed to ``open()``.
+        """
+        # explicitly set buffering off?
+        with self.open(path, "rb", **kwargs) as f:
+            if start is not None:
+                if start >= 0:
+                    f.seek(start)
+                else:
+                    f.seek(max(0, f.size + start))
+            if end is not None:
+                if end < 0:
+                    end = f.size + end
+                return f.read(end - f.tell())
+            return f.read()
+    def pipe_file(self, path, value, **kwargs):
+        """Set the bytes of given file"""
+        with self.open(path, "wb", **kwargs) as f:
+            f.write(value)
+    def pipe(self, path, value=None, **kwargs):
+        """Put value into path
+        (counterpart to ``cat``)
+        Parameters
+        ----------
+        path: string or dict(str, bytes)
+            If a string, a single remote location to put ``value`` bytes; if a dict,
+            a mapping of {path: bytesvalue}.
+        value: bytes, optional
+            If using a single path, these are the bytes to put there. Ignored if
+            ``path`` is a dict
+        """
+        if isinstance(path, str):
+            self.pipe_file(self._strip_protocol(path), value, **kwargs)
+        elif isinstance(path, dict):
+            for k, v in path.items():
+                self.pipe_file(self._strip_protocol(k), v, **kwargs)
+        else:
+            raise ValueError("path must be str or dict")
+    def cat_ranges(
+        self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
+    ):
+        """Get the contents of byte ranges from one or more files
+        Parameters
+        ----------
+        paths: list
+            A list of of filepaths on this filesystems
+        starts, ends: int or list
+            Bytes limits of the read. If using a single int, the same value will be
+            used to read all the specified files.
+        """
+        if max_gap is not None:
+            raise NotImplementedError
+        if not isinstance(paths, list):
+            raise TypeError
+        if not isinstance(starts, list):
+            starts = [starts] * len(paths)
+        if not isinstance(ends, list):
+            ends = [ends] * len(paths)
+        if len(starts) != len(paths) or len(ends) != len(paths):
+            raise ValueError
+        out = []
+        for p, s, e in zip(paths, starts, ends):
+            try:
+                out.append(self.cat_file(p, s, e))
+            except Exception as e:
+                if on_error == "return":
+                    out.append(e)
+                else:
+                    raise
+        return out
+    def cat(self, path, recursive=False, on_error="raise", **kwargs):
+        """Fetch (potentially multiple) paths' contents
+        Parameters
+        ----------
+        recursive: bool
+            If True, assume the path(s) are directories, and get all the
+            contained files
+        on_error : "raise", "omit", "return"
+            If raise, an underlying exception will be raised (converted to KeyError
+            if the type is in self.missing_exceptions); if omit, keys with exception
+            will simply not be included in the output; if "return", all keys are
+            included in the output, but the value will be bytes or an exception
+            instance.
+        kwargs: passed to cat_file
+        Returns
+        -------
+        dict of {path: contents} if there are multiple paths
+        or the path has been otherwise expanded
+        """
+        paths = self.expand_path(path, recursive=recursive)
+        if (
+            len(paths) > 1
+            or isinstance(path, list)
+            or paths[0] != self._strip_protocol(path)
+        ):
+            out = {}
+            for path in paths:
+                try:
+                    out[path] = self.cat_file(path, **kwargs)
+                except Exception as e:
+                    if on_error == "raise":
+                        raise
+                    if on_error == "return":
+                        out[path] = e
+            return out
+        else:
+            return self.cat_file(paths[0], **kwargs)
+    def get_file(
+        self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
+    ):
+        """Copy single remote file to local"""
+        from .implementations.local import LocalFileSystem
+        if isfilelike(lpath):
+            outfile = lpath
+        elif self.isdir(rpath):
+            os.makedirs(lpath, exist_ok=True)
+            return None
+        fs = LocalFileSystem(auto_mkdir=True)
+        fs.makedirs(fs._parent(lpath), exist_ok=True)
+        with self.open(rpath, "rb", **kwargs) as f1:
+            if outfile is None:
+                outfile = open(lpath, "wb")
+            try:
+                callback.set_size(getattr(f1, "size", None))
+                data = True
+                while data:
+                    data = f1.read(self.blocksize)
+                    segment_len = outfile.write(data)
+                    if segment_len is None:
+                        segment_len = len(data)
+                    callback.relative_update(segment_len)
+            finally:
+                if not isfilelike(lpath):
+                    outfile.close()
+    def get(
+        self,
+        rpath,
+        lpath,
+        recursive=False,
+        callback=_DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) to local.
+        Copies a specific file or tree of files (if recursive=True). If lpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within. Can submit a list of paths, which may be glob-patterns
+        and will be expanded.
+        Calls get_file for each source.
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            from .implementations.local import (
+                LocalFileSystem,
+                make_path_posix,
+                trailing_sep,
+            )
+            source_is_str = isinstance(rpath, str)
+            rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
+                if not rpaths:
+                    return
+            if isinstance(lpath, str):
+                lpath = make_path_posix(lpath)
+            source_is_file = len(rpaths) == 1
+            dest_is_dir = isinstance(lpath, str) and (
+                trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
+            )
+            exists = source_is_str and (
+                (has_magic(rpath) and source_is_file)
+                or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
+            )
+            lpaths = other_paths(
+                rpaths,
+                lpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        callback.set_size(len(lpaths))
+        for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
+            callback.branch(rpath, lpath, kwargs)
+            self.get_file(rpath, lpath, **kwargs)
+    def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
+        """Copy single file to remote"""
+        if os.path.isdir(lpath):
+            self.makedirs(rpath, exist_ok=True)
+            return None
+        with open(lpath, "rb") as f1:
+            size = f1.seek(0, 2)
+            callback.set_size(size)
+            f1.seek(0)
+            self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
+            with self.open(rpath, "wb", **kwargs) as f2:
+                while f1.tell() < size:
+                    data = f1.read(self.blocksize)
+                    segment_len = f2.write(data)
+                    if segment_len is None:
+                        segment_len = len(data)
+                    callback.relative_update(segment_len)
+    def put(
+        self,
+        lpath,
+        rpath,
+        recursive=False,
+        callback=_DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) from local.
+        Copies a specific file or tree of files (if recursive=True). If rpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within.
+        Calls put_file for each source.
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            from .implementations.local import (
+                LocalFileSystem,
+                make_path_posix,
+                trailing_sep,
+            )
+            source_is_str = isinstance(lpath, str)
+            if source_is_str:
+                lpath = make_path_posix(lpath)
+            fs = LocalFileSystem()
+            lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
+                if not lpaths:
+                    return
+            source_is_file = len(lpaths) == 1
+            dest_is_dir = isinstance(rpath, str) and (
+                trailing_sep(rpath) or self.isdir(rpath)
+            )
+            rpath = (
+                self._strip_protocol(rpath)
+                if isinstance(rpath, str)
+                else [self._strip_protocol(p) for p in rpath]
+            )
+            exists = source_is_str and (
+                (has_magic(lpath) and source_is_file)
+                or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
+            )
+            rpaths = other_paths(
+                lpaths,
+                rpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        callback.set_size(len(rpaths))
+        for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
+            callback.branch(lpath, rpath, kwargs)
+            self.put_file(lpath, rpath, **kwargs)
+    def head(self, path, size=1024):
+        """Get the first ``size`` bytes from file"""
+        with self.open(path, "rb") as f:
+            return f.read(size)
+    def tail(self, path, size=1024):
+        """Get the last ``size`` bytes from file"""
+        with self.open(path, "rb") as f:
+            f.seek(max(-size, -f.size), 2)
+            return f.read()
+    def cp_file(self, path1, path2, **kwargs):
+        raise NotImplementedError
+    def copy(
+        self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
+    ):
+        """Copy within two locations in the filesystem
+        on_error : "raise", "ignore"
+            If raise, any not-found exceptions will be raised; if ignore any
+            not-found exceptions will cause the path to be skipped; defaults to
+            raise unless recursive is true, where the default is ignore
+        """
+        if on_error is None and recursive:
+            on_error = "ignore"
+        elif on_error is None:
+            on_error = "raise"
+        if isinstance(path1, list) and isinstance(path2, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            paths1 = path1
+            paths2 = path2
+        else:
+            from .implementations.local import trailing_sep
+            source_is_str = isinstance(path1, str)
+            paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
+                if not paths1:
+                    return
+            source_is_file = len(paths1) == 1
+            dest_is_dir = isinstance(path2, str) and (
+                trailing_sep(path2) or self.isdir(path2)
+            )
+            exists = source_is_str and (
+                (has_magic(path1) and source_is_file)
+                or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
+            )
+            paths2 = other_paths(
+                paths1,
+                path2,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        for p1, p2 in zip(paths1, paths2):
+            try:
+                self.cp_file(p1, p2, **kwargs)
+            except FileNotFoundError:
+                if on_error == "raise":
+                    raise
+    def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
+        """Turn one or more globs or directories into a list of all matching paths
+        to files or directories.
+        kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
+        """
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        if isinstance(path, str):
+            out = self.expand_path([path], recursive, maxdepth)
+        else:
+            out = set()
+            path = [self._strip_protocol(p) for p in path]
+            for p in path:
+                if has_magic(p):
+                    bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
+                    out |= bit
+                    if recursive:
+                        # glob call above expanded one depth so if maxdepth is defined
+                        # then decrement it in expand_path call below. If it is zero
+                        # after decrementing then avoid expand_path call.
+                        if maxdepth is not None and maxdepth <= 1:
+                            continue
+                        out |= set(
+                            self.expand_path(
+                                list(bit),
+                                recursive=recursive,
+                                maxdepth=maxdepth - 1 if maxdepth is not None else None,
+                                **kwargs,
+                            )
+                        )
+                    continue
+                elif recursive:
+                    rec = set(
+                        self.find(
+                            p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
+                        )
+                    )
+                    out |= rec
+                if p not in out and (recursive is False or self.exists(p)):
+                    # should only check once, for the root
+                    out.add(p)
+        if not out:
+            raise FileNotFoundError(path)
+        return sorted(out)
+    def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
+        """Move file(s) from one location to another"""
+        if path1 == path2:
+            logger.debug("%s mv: The paths are the same, so no files were moved.", self)
+        else:
+            self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
+            self.rm(path1, recursive=recursive)
+    def rm_file(self, path):
+        """Delete a file"""
+        self._rm(path)
+    def _rm(self, path):
+        """Delete one file"""
+        # this is the old name for the method, prefer rm_file
+        raise NotImplementedError
+    def rm(self, path, recursive=False, maxdepth=None):
+        """Delete files.
+        Parameters
+        ----------
+        path: str or list of str
+            File(s) to delete.
+        recursive: bool
+            If file(s) are directories, recursively delete contents and then
+            also remove the directory
+        maxdepth: int or None
+            Depth to pass to walk for finding files to delete, if recursive.
+            If None, there will be no limit and infinite recursion may be
+            possible.
+        """
+        path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
+        for p in reversed(path):
+            self.rm_file(p)
+    @classmethod
+    def _parent(cls, path):
+        path = cls._strip_protocol(path)
+        if "/" in path:
+            parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
+            return cls.root_marker + parent
+        else:
+            return cls.root_marker
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        """Return raw bytes-mode file-like from the file-system"""
+        return AbstractBufferedFile(
+            self,
+            path,
+            mode,
+            block_size,
+            autocommit,
+            cache_options=cache_options,
+            **kwargs,
+        )
+    def open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        cache_options=None,
+        compression=None,
+        **kwargs,
+    ):
+        """
+        Return a file-like object from the filesystem
+        The resultant instance must function correctly in a context ``with``
+        block.
+        Parameters
+        ----------
+        path: str
+            Target file
+        mode: str like 'rb', 'w'
+            See builtin ``open()``
+        block_size: int
+            Some indication of buffering - this is a value in bytes
+        cache_options : dict, optional
+            Extra arguments to pass through to the cache.
+        compression: string or None
+            If given, open file using compression codec. Can either be a compression
+            name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+            compression from the filename suffix.
+        encoding, errors, newline: passed on to TextIOWrapper for text mode
+        """
+        import io
+        path = self._strip_protocol(path)
+        if "b" not in mode:
+            mode = mode.replace("t", "") + "b"
+            text_kwargs = {
+                k: kwargs.pop(k)
+                for k in ["encoding", "errors", "newline"]
+                if k in kwargs
+            }
+            return io.TextIOWrapper(
+                self.open(
+                    path,
+                    mode,
+                    block_size=block_size,
+                    cache_options=cache_options,
+                    compression=compression,
+                    **kwargs,
+                ),
+                **text_kwargs,
+            )
+        else:
+            ac = kwargs.pop("autocommit", not self._intrans)
+            f = self._open(
+                path,
+                mode=mode,
+                block_size=block_size,
+                autocommit=ac,
+                cache_options=cache_options,
+                **kwargs,
+            )
+            if compression is not None:
+                from fsspec.compression import compr
+                from fsspec.core import get_compression
+                compression = get_compression(path, compression)
+                compress = compr[compression]
+                f = compress(f, mode=mode[0])
+            if not ac and "r" not in mode:
+                self.transaction.files.append(f)
+            return f
+    def touch(self, path, truncate=True, **kwargs):
+        """Create empty file, or update timestamp
+        Parameters
+        ----------
+        path: str
+            file location
+        truncate: bool
+            If True, always set file size to 0; if False, update timestamp and
+            leave file unchanged, if backend allows this
+        """
+        if truncate or not self.exists(path):
+            with self.open(path, "wb", **kwargs):
+                pass
+        else:
+            raise NotImplementedError  # update timestamp, if possible
+    def ukey(self, path):
+        """Hash of file properties, to tell if it has changed"""
+        return sha256(str(self.info(path)).encode()).hexdigest()
+    def read_block(self, fn, offset, length, delimiter=None):
+        """Read a block of bytes from
+        Starting at ``offset`` of the file, read ``length`` bytes.  If
+        ``delimiter`` is set then we ensure that the read starts and stops at
+        delimiter boundaries that follow the locations ``offset`` and ``offset
+        + length``.  If ``offset`` is zero then we start at zero.  The
+        bytestring returned WILL include the end delimiter string.
+        If offset+length is beyond the eof, reads to eof.
+        Parameters
+        ----------
+        fn: string
+            Path to filename
+        offset: int
+            Byte offset to start read
+        length: int
+            Number of bytes to read. If None, read to end.
+        delimiter: bytes (optional)
+            Ensure reading starts and stops at delimiter bytestring
+        Examples
+        --------
+        >>> fs.read_block('data/file.csv', 0, 13)  # doctest: +SKIP
+        b'Alice, 100\\nBo'
+        >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n')  # doctest: +SKIP
+        b'Alice, 100\\nBob, 200\\n'
+        Use ``length=None`` to read to the end of the file.
+        >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n')  # doctest: +SKIP
+        b'Alice, 100\\nBob, 200\\nCharlie, 300'
+        See Also
+        --------
+        :func:`fsspec.utils.read_block`
+        """
+        with self.open(fn, "rb") as f:
+            size = f.size
+            if length is None:
+                length = size
+            if size is not None and offset + length > size:
+                length = size - offset
+            return read_block(f, offset, length, delimiter)
+    def to_json(self):
+        """
+        JSON representation of this filesystem instance
+        Returns
+        -------
+        str: JSON structure with keys cls (the python location of this class),
+            protocol (text name of this class's protocol, first one in case of
+            multiple), args (positional args, usually empty), and all other
+            kwargs as their own keys.
+        """
+        import json
+        cls = type(self)
+        cls = ".".join((cls.__module__, cls.__name__))
+        proto = (
+            self.protocol[0]
+            if isinstance(self.protocol, (tuple, list))
+            else self.protocol
+        )
+        return json.dumps(
+            dict(
+                **{"cls": cls, "protocol": proto, "args": self.storage_args},
+                **self.storage_options,
+            )
+        )
+    @staticmethod
+    def from_json(blob):
+        """
+        Recreate a filesystem instance from JSON representation
+        See ``.to_json()`` for the expected structure of the input
+        Parameters
+        ----------
+        blob: str
+        Returns
+        -------
+        file system instance, not necessarily of this particular class.
+        """
+        import json
+        from .registry import _import_class, get_filesystem_class
+        dic = json.loads(blob)
+        protocol = dic.pop("protocol")
+        try:
+            cls = _import_class(dic.pop("cls"))
+        except (ImportError, ValueError, RuntimeError, KeyError):
+            cls = get_filesystem_class(protocol)
+        return cls(*dic.pop("args", ()), **dic)
+    def _get_pyarrow_filesystem(self):
+        """
+        Make a version of the FS instance which will be acceptable to pyarrow
+        """
+        # all instances already also derive from pyarrow
+        return self
+    def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
+        """Create key/value store based on this file-system
+        Makes a MutableMapping interface to the FS at the given root path.
+        See ``fsspec.mapping.FSMap`` for further details.
+        """
+        from .mapping import FSMap
+        return FSMap(
+            root,
+            self,
+            check=check,
+            create=create,
+            missing_exceptions=missing_exceptions,
+        )
+    @classmethod
+    def clear_instance_cache(cls):
+        """
+        Clear the cache of filesystem instances.
+        Notes
+        -----
+        Unless overridden by setting the ``cachable`` class attribute to False,
+        the filesystem class stores a reference to newly created instances. This
+        prevents Python's normal rules around garbage collection from working,
+        since the instances refcount will not drop to zero until
+        ``clear_instance_cache`` is called.
+        """
+        cls._cache.clear()
+    def created(self, path):
+        """Return the created timestamp of a file as a datetime.datetime"""
+        raise NotImplementedError
+    def modified(self, path):
+        """Return the modified timestamp of a file as a datetime.datetime"""
+        raise NotImplementedError
+    # ------------------------------------------------------------------------
+    # Aliases
+    def read_bytes(self, path, start=None, end=None, **kwargs):
+        """Alias of `AbstractFileSystem.cat_file`."""
+        return self.cat_file(path, start=start, end=end, **kwargs)
+    def write_bytes(self, path, value, **kwargs):
+        """Alias of `AbstractFileSystem.pipe_file`."""
+        self.pipe_file(path, value, **kwargs)
+    def makedir(self, path, create_parents=True, **kwargs):
+        """Alias of `AbstractFileSystem.mkdir`."""
+        return self.mkdir(path, create_parents=create_parents, **kwargs)
+    def mkdirs(self, path, exist_ok=False):
+        """Alias of `AbstractFileSystem.makedirs`."""
+        return self.makedirs(path, exist_ok=exist_ok)
+    def listdir(self, path, detail=True, **kwargs):
+        """Alias of `AbstractFileSystem.ls`."""
+        return self.ls(path, detail=detail, **kwargs)
+    def cp(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.copy`."""
+        return self.copy(path1, path2, **kwargs)
+    def move(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.mv`."""
+        return self.mv(path1, path2, **kwargs)
+    def stat(self, path, **kwargs):
+        """Alias of `AbstractFileSystem.info`."""
+        return self.info(path, **kwargs)
+    def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
+        """Alias of `AbstractFileSystem.du`."""
+        return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
+    def rename(self, path1, path2, **kwargs):
+        """Alias of `AbstractFileSystem.mv`."""
+        return self.mv(path1, path2, **kwargs)
+    def delete(self, path, recursive=False, maxdepth=None):
+        """Alias of `AbstractFileSystem.rm`."""
+        return self.rm(path, recursive=recursive, maxdepth=maxdepth)
+    def upload(self, lpath, rpath, recursive=False, **kwargs):
+        """Alias of `AbstractFileSystem.put`."""
+        return self.put(lpath, rpath, recursive=recursive, **kwargs)
+    def download(self, rpath, lpath, recursive=False, **kwargs):
+        """Alias of `AbstractFileSystem.get`."""
+        return self.get(rpath, lpath, recursive=recursive, **kwargs)
+    def sign(self, path, expiration=100, **kwargs):
+        """Create a signed URL representing the given path
+        Some implementations allow temporary URLs to be generated, as a
+        way of delegating credentials.
+        Parameters
+        ----------
+        path : str
+             The path on the filesystem
+        expiration : int
+            Number of seconds to enable the URL for (if supported)
+        Returns
+        -------
+        URL : str
+            The signed URL
+        Raises
+        ------
+        NotImplementedError : if method is not implemented for a filesystem
+        """
+        raise NotImplementedError("Sign is not implemented for this filesystem")
+    def _isfilestore(self):
+        # Originally inherited from pyarrow DaskFileSystem. Keeping this
+        # here for backwards compatibility as long as pyarrow uses its
+        # legacy fsspec-compatible filesystems and thus accepts fsspec
+        # filesystems as well
+        return False
+class AbstractBufferedFile(io.IOBase):
+    """Convenient class to derive from to provide buffering
+    In the case that the backend does not provide a pythonic file-like object
+    already, this class contains much of the logic to build one. The only
+    methods that need to be overridden are ``_upload_chunk``,
+    ``_initiate_upload`` and ``_fetch_range``.
+    """
+    DEFAULT_BLOCK_SIZE = 5 * 2**20
+    _details = None
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        block_size="default",
+        autocommit=True,
+        cache_type="readahead",
+        cache_options=None,
+        size=None,
+        **kwargs,
+    ):
+        """
+        Template for files with buffered reading and writing
+        Parameters
+        ----------
+        fs: instance of FileSystem
+        path: str
+            location in file-system
+        mode: str
+            Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
+            systems may be read-only, and some may not support append.
+        block_size: int
+            Buffer size for reading or writing, 'default' for class default
+        autocommit: bool
+            Whether to write to final destination; may only impact what
+            happens when file is being closed.
+        cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
+            Caching policy in read mode. See the definitions in ``core``.
+        cache_options : dict
+            Additional options passed to the constructor for the cache specified
+            by `cache_type`.
+        size: int
+            If given and in read mode, suppressed having to look up the file size
+        kwargs:
+            Gets stored as self.kwargs
+        """
+        from .core import caches
+        self.path = path
+        self.fs = fs
+        self.mode = mode
+        self.blocksize = (
+            self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
+        )
+        self.loc = 0
+        self.autocommit = autocommit
+        self.end = None
+        self.start = None
+        self.closed = False
+        if cache_options is None:
+            cache_options = {}
+        if "trim" in kwargs:
+            warnings.warn(
+                "Passing 'trim' to control the cache behavior has been deprecated. "
+                "Specify it within the 'cache_options' argument instead.",
+                FutureWarning,
+            )
+            cache_options["trim"] = kwargs.pop("trim")
+        self.kwargs = kwargs
+        if mode not in {"ab", "rb", "wb"}:
+            raise NotImplementedError("File mode not supported")
+        if mode == "rb":
+            if size is not None:
+                self.size = size
+            else:
+                self.size = self.details["size"]
+            self.cache = caches[cache_type](
+                self.blocksize, self._fetch_range, self.size, **cache_options
+            )
+        else:
+            self.buffer = io.BytesIO()
+            self.offset = None
+            self.forced = False
+            self.location = None
+    @property
+    def details(self):
+        if self._details is None:
+            self._details = self.fs.info(self.path)
+        return self._details
+    @details.setter
+    def details(self, value):
+        self._details = value
+        self.size = value["size"]
+    @property
+    def full_name(self):
+        return _unstrip_protocol(self.path, self.fs)
+    @property
+    def closed(self):
+        # get around this attr being read-only in IOBase
+        # use getattr here, since this can be called during del
+        return getattr(self, "_closed", True)
+    @closed.setter
+    def closed(self, c):
+        self._closed = c
+    def __hash__(self):
+        if "w" in self.mode:
+            return id(self)
+        else:
+            return int(tokenize(self.details), 16)
+    def __eq__(self, other):
+        """Files are equal if they have the same checksum, only in read mode"""
+        return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
+    def commit(self):
+        """Move from temp to final destination"""
+    def discard(self):
+        """Throw away temporary file"""
+    def info(self):
+        """File information about this path"""
+        if "r" in self.mode:
+            return self.details
+        else:
+            raise ValueError("Info not available while writing")
+    def tell(self):
+        """Current file location"""
+        return self.loc
+    def seek(self, loc, whence=0):
+        """Set current file location
+        Parameters
+        ----------
+        loc: int
+            byte location
+        whence: {0, 1, 2}
+            from start of file, current location or end of file, resp.
+        """
+        loc = int(loc)
+        if not self.mode == "rb":
+            raise OSError(ESPIPE, "Seek only available in read mode")
+        if whence == 0:
+            nloc = loc
+        elif whence == 1:
+            nloc = self.loc + loc
+        elif whence == 2:
+            nloc = self.size + loc
+        else:
+            raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
+        if nloc < 0:
+            raise ValueError("Seek before start of file")
+        self.loc = nloc
+        return self.loc
+    def write(self, data):
+        """
+        Write data to buffer.
+        Buffer only sent on flush() or if buffer is greater than
+        or equal to blocksize.
+        Parameters
+        ----------
+        data: bytes
+            Set of bytes to be written.
+        """
+        if self.mode not in {"wb", "ab"}:
+            raise ValueError("File not in write mode")
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if self.forced:
+            raise ValueError("This file has been force-flushed, can only close")
+        out = self.buffer.write(data)
+        self.loc += out
+        if self.buffer.tell() >= self.blocksize:
+            self.flush()
+        return out
+    def flush(self, force=False):
+        """
+        Write buffered data to backend store.
+        Writes the current buffer, if it is larger than the block-size, or if
+        the file is being closed.
+        Parameters
+        ----------
+        force: bool
+            When closing, write the last block even if it is smaller than
+            blocks are allowed to be. Disallows further writing to this file.
+        """
+        if self.closed:
+            raise ValueError("Flush on closed file")
+        if force and self.forced:
+            raise ValueError("Force flush cannot be called more than once")
+        if force:
+            self.forced = True
+        if self.mode not in {"wb", "ab"}:
+            # no-op to flush on read-mode
+            return
+        if not force and self.buffer.tell() < self.blocksize:
+            # Defer write on small block
+            return
+        if self.offset is None:
+            # Initialize a multipart upload
+            self.offset = 0
+            try:
+                self._initiate_upload()
+            except:  # noqa: E722
+                self.closed = True
+                raise
+        if self._upload_chunk(final=force) is not False:
+            self.offset += self.buffer.seek(0, 2)
+            self.buffer = io.BytesIO()
+    def _upload_chunk(self, final=False):
+        """Write one part of a multi-block file upload
+        Parameters
+        ==========
+        final: bool
+            This is the last block, so should complete file, if
+            self.autocommit is True.
+        """
+        # may not yet have been initialized, may need to call _initialize_upload
+    def _initiate_upload(self):
+        """Create remote file/upload"""
+        pass
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        raise NotImplementedError
+    def read(self, length=-1):
+        """
+        Return data from cache, or fetch pieces as necessary
+        Parameters
+        ----------
+        length: int (-1)
+            Number of bytes to read; if <0, all remaining bytes.
+        """
+        length = -1 if length is None else int(length)
+        if self.mode != "rb":
+            raise ValueError("File not in read mode")
+        if length < 0:
+            length = self.size - self.loc
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
+        if length == 0:
+            # don't even bother calling fetch
+            return b""
+        out = self.cache._fetch(self.loc, self.loc + length)
+        self.loc += len(out)
+        return out
+    def readinto(self, b):
+        """mirrors builtin file's readinto method
+        https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
+        """
+        out = memoryview(b).cast("B")
+        data = self.read(out.nbytes)
+        out[: len(data)] = data
+        return len(data)
+    def readuntil(self, char=b"\n", blocks=None):
+        """Return data between current position and first occurrence of char
+        char is included in the output, except if the end of the tile is
+        encountered first.
+        Parameters
+        ----------
+        char: bytes
+            Thing to find
+        blocks: None or int
+            How much to read in each go. Defaults to file blocksize - which may
+            mean a new read on every call.
+        """
+        out = []
+        while True:
+            start = self.tell()
+            part = self.read(blocks or self.blocksize)
+            if len(part) == 0:
+                break
+            found = part.find(char)
+            if found > -1:
+                out.append(part[: found + len(char)])
+                self.seek(start + found + len(char))
+                break
+            out.append(part)
+        return b"".join(out)
+    def readline(self):
+        """Read until first occurrence of newline character
+        Note that, because of character encoding, this is not necessarily a
+        true line ending.
+        """
+        return self.readuntil(b"\n")
+    def __next__(self):
+        out = self.readline()
+        if out:
+            return out
+        raise StopIteration
+    def __iter__(self):
+        return self
+    def readlines(self):
+        """Return all data, split by the newline character"""
+        data = self.read()
+        lines = data.split(b"\n")
+        out = [l + b"\n" for l in lines[:-1]]
+        if data.endswith(b"\n"):
+            return out
+        else:
+            return out + [lines[-1]]
+        # return list(self)  ???
+    def readinto1(self, b):
+        return self.readinto(b)
+    def close(self):
+        """Close file
+        Finalizes writes, discards cache
+        """
+        if getattr(self, "_unclosable", False):
+            return
+        if self.closed:
+            return
+        if self.mode == "rb":
+            self.cache = None
+        else:
+            if not self.forced:
+                self.flush(force=True)
+            if self.fs is not None:
+                self.fs.invalidate_cache(self.path)
+                self.fs.invalidate_cache(self.fs._parent(self.path))
+        self.closed = True
+    def readable(self):
+        """Whether opened for reading"""
+        return self.mode == "rb" and not self.closed
+    def seekable(self):
+        """Whether is seekable (only in read mode)"""
+        return self.readable()
+    def writable(self):
+        """Whether opened for writing"""
+        return self.mode in {"wb", "ab"} and not self.closed
+    def __del__(self):
+        if not self.closed:
+            self.close()
+    def __str__(self):
+        return f"<File-like object {type(self.fs).__name__}, {self.path}>"
+    __repr__ = __str__
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()

lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+from hashlib import md5
+import pytest
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.tests.abstract.copy import AbstractCopyTests  # noqa
+from fsspec.tests.abstract.get import AbstractGetTests  # noqa
+from fsspec.tests.abstract.put import AbstractPutTests  # noqa
+class BaseAbstractFixtures:
+    """
+    Abstract base class containing fixtures that are used by but never need to
+    be overridden in derived filesystem-specific classes to run the abstract
+    tests on such filesystems.
+    """
+    @pytest.fixture
+    def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_target(self, fs, fs_join, fs_path):
+        """
+        Return name of remote directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = fs_join(fs_path, "target")
+        yield target
+        if fs.exists(target):
+            fs.rm(target, recursive=True)
+    @pytest.fixture
+    def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_dir_and_file_with_same_name_prefix(
+        self, local_fs, local_join, local_path
+    ):
+        """
+        Scenario on local filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(
+            local_fs, local_join, local_path
+        )
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_target(self, local_fs, local_join, local_path):
+        """
+        Return name of local directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = local_join(local_path, "target")
+        yield target
+        if local_fs.exists(target):
+            local_fs.rm(target, recursive=True)
+    def _glob_edge_cases_files(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for glob edge cases cp/get/put tests.
+        Creates the following directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        ├── 📁 subdir0
+        │   ├── 📄 subfile1
+        │   ├── 📄 subfile2
+        │   └── 📁 nesteddir
+        │       └── 📄 nestedfile
+        └── 📁 subdir1
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        for subdir_idx in range(2):
+            subdir = some_join(source, f"subdir{subdir_idx}")
+            nesteddir = some_join(subdir, "nesteddir")
+            some_fs.makedirs(nesteddir)
+            some_fs.touch(some_join(subdir, "subfile1"))
+            some_fs.touch(some_join(subdir, "subfile2"))
+            some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for many cp/get/put tests. Creates the following
+        directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        └── 📁 subdir
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        nesteddir = some_join(subdir, "nesteddir")
+        some_fs.makedirs(nesteddir)
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        some_fs.touch(some_join(subdir, "subfile1"))
+        some_fs.touch(some_join(subdir, "subfile2"))
+        some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put on directory and file with
+        the same name prefixes. Creates the following directory and file structure:
+        📁 source
+        ├── 📄 subdir.txt
+        └── 📁 subdir
+            └── 📄 subfile.txt
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        file = some_join(source, "subdir.txt")
+        subfile = some_join(subdir, "subfile.txt")
+        some_fs.makedirs(subdir)
+        some_fs.touch(file)
+        some_fs.touch(subfile)
+        return source
+    def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put files order when source and
+        destination are lists. Creates the following directory and file structure:
+        📁 source
+        └── 📄 {hashed([0-9])}.txt
+        """
+        source = some_join(some_path, "source")
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            path = some_join(source, f"{hashed_i}.txt")
+            some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
+        return source
+class AbstractFixtures(BaseAbstractFixtures):
+    """
+    Abstract base class containing fixtures that may be overridden in derived
+    filesystem-specific classes to run the abstract tests on such filesystems.
+    For any particular filesystem some of these fixtures must be overridden,
+    such as ``fs`` and ``fs_path``, and others may be overridden if the
+    default functions here are not appropriate, such as ``fs_join``.
+    """
+    @pytest.fixture
+    def fs(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture
+    def fs_join(self):
+        """
+        Return a function that joins its arguments together into a path.
+        Most fsspec implementations join paths in a platform-dependent way,
+        but some will override this to always use a forward slash.
+        """
+        return os.path.join
+    @pytest.fixture
+    def fs_path(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture(scope="class")
+    def local_fs(self):
+        # Maybe need an option for auto_mkdir=False?  This is only relevant
+        # for certain implementations.
+        return LocalFileSystem(auto_mkdir=True)
+    @pytest.fixture
+    def local_join(self):
+        """
+        Return a function that joins its arguments together into a path, on
+        the local filesystem.
+        """
+        return os.path.join
+    @pytest.fixture
+    def local_path(self, tmpdir):
+        return tmpdir
+    @pytest.fixture
+    def supports_empty_directories(self):
+        """
+        Return whether this implementation supports empty directories.
+        """
+        return True
+    @pytest.fixture
+    def fs_sanitize_path(self):
+        return lambda x: x

lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (15 kB). View file

lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc ADDED Viewed

Binary file (2.33 kB). View file

lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc ADDED Viewed

Binary file (26.5 kB). View file

lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc ADDED Viewed

Binary file (26.3 kB). View file

lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc ADDED Viewed

Binary file (27.7 kB). View file

lib/python3.11/site-packages/fsspec/tests/abstract/common.py ADDED Viewed

	@@ -0,0 +1,175 @@

+GLOB_EDGE_CASES_TESTS = {
+    "argnames": ("path", "recursive", "maxdepth", "expected"),
+    "argvalues": [
+        ("fil?1", False, None, ["file1"]),
+        ("fil?1", True, None, ["file1"]),
+        ("file[1-2]", False, None, ["file1", "file2"]),
+        ("file[1-2]", True, None, ["file1", "file2"]),
+        ("*", False, None, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*", True, 1, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("*1", False, None, ["file1"]),
+        (
+            "*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
+        (
+            "**",
+            False,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**", True, 1, ["file1", "file2"]),
+        (
+            "**",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            False,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        (
+            "**/*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir0/subfile1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**/*1", True, 1, ["file1"]),
+        (
+            "**/*1",
+            True,
+            2,
+            ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
+        ),
+        ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        ("**/subdir0", False, None, []),
+        ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("**/subdir0/nested*", False, 2, []),
+        ("**/subdir0/nested*", True, 2, ["nestedfile"]),
+        ("subdir[1-2]", False, None, []),
+        ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
+        ("subdir[0-1]", False, None, []),
+        (
+            "subdir[0-1]",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            False,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+    ],
+}

lib/python3.11/site-packages/fsspec/tests/abstract/copy.py ADDED Viewed

	@@ -0,0 +1,543 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractCopyTests:
+    def test_copy_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.cp(fs_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.cp(fs_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.cp(fs_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_copy_file_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_copy_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_copy_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.cp(s, t)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_copy_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.cp(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_copy_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_copy_glob_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_copy_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        fs_target,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_copy_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.cp(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_copy_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.cp(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_two_files_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # This is a duplicate of test_copy_list_of_files_to_new_directory and
+        # can eventually be removed.
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        assert not fs.exists(target)
+        fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
+        assert fs.isdir(target)
+        assert fs.isfile(fs_join(target, "file1"))
+        assert fs.isfile(fs_join(target, "file2"))
+    def test_copy_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        fs_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.cp(fs_join(source, "subdir"), target, recursive=True)
+        assert fs.isfile(fs_join(target, "subfile.txt"))
+        assert not fs.isfile(fs_join(target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.cp(fs_join(source, "subdir*"), target, recursive=True)
+        assert fs.isdir(fs_join(target, "subdir"))
+        assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.copy(path1=source_files, path2=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

lib/python3.11/site-packages/fsspec/tests/abstract/get.py ADDED Viewed

	@@ -0,0 +1,587 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.implementations.local import make_path_posix
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractGetTests:
+    def test_get_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        target_file2 = local_join(target, "file2")
+        target_subfile1 = local_join(target, "subfile1")
+        # Copy from source directory
+        fs.get(fs_join(source, "file2"), target)
+        assert local_fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.get(fs_join(source, "subdir", "subfile1"), target)
+        assert local_fs.isfile(target_subfile1)
+        # Remove copied files
+        local_fs.rm([target_file2, target_subfile1])
+        assert not local_fs.exists(target_file2)
+        assert not local_fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.get(fs_join(source, "file2"), target + "/")
+        assert local_fs.isdir(target)
+        assert local_fs.isfile(target_file2)
+        fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert local_fs.isfile(target_subfile1)
+    def test_get_file_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
+        )  # Note trailing slash
+        assert local_fs.isdir(target)
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
+        assert local_fs.isfile(local_join(target, "newfile"))
+    def test_get_file_to_file_in_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"),
+            local_join(target, "newdir", "newfile"),
+        )
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "newfile"))
+    def test_get_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "subdir", "nesteddir", "nestedfile")
+                )
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+    def test_get_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = local_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+            assert local_fs.isfile(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert not local_fs.exists(local_join(target, "newdir"))
+    def test_get_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            assert local_fs.isfile(local_join(target, "subfile2"))
+            assert not local_fs.isdir(local_join(target, "nesteddir"))
+            assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(
+                [
+                    local_join(target, "subfile1"),
+                    local_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+    def test_get_glob_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "newdir", "nesteddir", "nestedfile")
+                )
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_join(target, "newdir"), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_get_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = local_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            local_fs.mkdir(target)
+            t = local_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = local_fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                local_fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_get_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.get(source_files, t)
+            assert local_fs.isfile(local_join(target, "file1"))
+            assert local_fs.isfile(local_join(target, "file2"))
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            local_fs.rm(
+                [
+                    local_join(target, "file1"),
+                    local_join(target, "file2"),
+                    local_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+    def test_get_list_of_files_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.get(source_files, local_join(target, "newdir") + "/")  # Note trailing slash
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "file1"))
+        assert local_fs.isfile(local_join(target, "newdir", "file2"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_directory_recursive(
+        self, fs, fs_join, fs_path, local_fs, local_join, local_target
+    ):
+        # https://github.com/fsspec/filesystem_spec/issues/1062
+        # Recursive cp/get/put of source directory into non-existent target directory.
+        src = fs_join(fs_path, "src")
+        src_file = fs_join(src, "file")
+        fs.mkdir(src)
+        fs.touch(src_file)
+        target = local_target
+        # get without slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src, target, recursive=True)
+            assert local_fs.isdir(target)
+            if loop == 0:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert not local_fs.exists(local_join(target, "src"))
+            else:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert local_fs.isdir(local_join(target, "src"))
+                assert local_fs.isfile(local_join(target, "src", "file"))
+        local_fs.rm(target, recursive=True)
+        # get with slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src + "/", target, recursive=True)
+            assert local_fs.isdir(target)
+            assert local_fs.isfile(local_join(target, "file"))
+            assert not local_fs.exists(local_join(target, "src"))
+    def test_get_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_dir_and_file_with_same_name_prefix,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = local_target
+        # Test without glob
+        fs.get(fs_join(source, "subdir"), target, recursive=True)
+        assert local_fs.isfile(local_join(target, "subfile.txt"))
+        assert not local_fs.isfile(local_join(target, "subdir.txt"))
+        local_fs.rm([local_join(target, "subfile.txt")])
+        assert local_fs.ls(target) == []
+        # Test with glob
+        fs.get(fs_join(source, "subdir*"), target, recursive=True)
+        assert local_fs.isdir(local_join(target, "subdir"))
+        assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
+        assert local_fs.isfile(local_join(target, "subdir.txt"))
+    def test_get_with_source_and_destination_as_list(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_10_files_with_hashed_names,
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = local_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(
+                make_path_posix(local_join(target, f"{hashed_i}.txt"))
+            )
+        # Copy and assert order was kept
+        fs.get(rpath=source_files, lpath=destination_files)
+        for i in range(10):
+            file_content = local_fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

lib/python3.11/site-packages/fsspec/tests/abstract/put.py ADDED Viewed

	@@ -0,0 +1,577 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractPutTests:
+    def test_put_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.put(local_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.put(local_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.put(local_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.put(local_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_put_file_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1b
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.put(
+            local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_put_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        supports_empty_directories,
+        local_bulk_operations_scenario_0,
+    ):
+        # Copy scenario 1c
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_put_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1d
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.put(
+            local_join(source, "subdir", "subfile1"),
+            fs_join(target, "newdir", "newfile"),
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_put_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.put(s, t)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # With recursive
+            fs.put(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # Limit recursive by maxdepth
+            fs.put(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_put_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.put(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.put(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.put(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_put_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        supports_empty_directories,
+        local_bulk_operations_scenario_0,
+    ):
+        # Copy scenario 1g
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.put(local_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+                # Limit recursive by maxdepth
+                fs.put(
+                    local_join(source, "subdir", glob),
+                    t,
+                    recursive=recursive,
+                    maxdepth=1,
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_put_glob_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1h
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.put(local_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.put(
+                    local_join(source, "subdir", glob),
+                    t,
+                    recursive=recursive,
+                    maxdepth=1,
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_put_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_target,
+        local_glob_edge_cases_files,
+        local_join,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = local_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_put_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            local_join(source, "file1"),
+            local_join(source, "file2"),
+            local_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.put(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
+    def test_put_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 2b
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            local_join(source, "file1"),
+            local_join(source, "file2"),
+            local_join(source, "subdir", "subfile1"),
+        ]
+        fs.put(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_put_directory_recursive(
+        self, fs, fs_join, fs_target, local_fs, local_join, local_path
+    ):
+        # https://github.com/fsspec/filesystem_spec/issues/1062
+        # Recursive cp/get/put of source directory into non-existent target directory.
+        src = local_join(local_path, "src")
+        src_file = local_join(src, "file")
+        local_fs.mkdir(src)
+        local_fs.touch(src_file)
+        target = fs_target
+        # put without slash
+        assert not fs.exists(target)
+        for loop in range(2):
+            fs.put(src, target, recursive=True)
+            assert fs.isdir(target)
+            if loop == 0:
+                assert fs.isfile(fs_join(target, "file"))
+                assert not fs.exists(fs_join(target, "src"))
+            else:
+                assert fs.isfile(fs_join(target, "file"))
+                assert fs.isdir(fs_join(target, "src"))
+                assert fs.isfile(fs_join(target, "src", "file"))
+        fs.rm(target, recursive=True)
+        # put with slash
+        assert not fs.exists(target)
+        for loop in range(2):
+            fs.put(src + "/", target, recursive=True)
+            assert fs.isdir(target)
+            assert fs.isfile(fs_join(target, "file"))
+            assert not fs.exists(fs_join(target, "src"))
+    def test_put_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = local_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.put(local_join(source, "subdir"), fs_target, recursive=True)
+        assert fs.isfile(fs_join(fs_target, "subfile.txt"))
+        assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
+        assert fs.isdir(fs_join(fs_target, "subdir"))
+        assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(fs_target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = local_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(local_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.put(lpath=source_files, rpath=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

lib/python3.11/site-packages/fsspec/transaction.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from collections import deque
+class Transaction:
+    """Filesystem transaction write context
+    Gathers files for deferred commit or discard, so that several write
+    operations can be finalized semi-atomically. This works by having this
+    instance as the ``.transaction`` attribute of the given filesystem
+    """
+    def __init__(self, fs):
+        """
+        Parameters
+        ----------
+        fs: FileSystem instance
+        """
+        self.fs = fs
+        self.files = deque()
+    def __enter__(self):
+        self.start()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """End transaction and commit, if exit is not due to exception"""
+        # only commit if there was no exception
+        self.complete(commit=exc_type is None)
+        self.fs._intrans = False
+        self.fs._transaction = None
+    def start(self):
+        """Start a transaction on this FileSystem"""
+        self.files = deque()  # clean up after previous failed completions
+        self.fs._intrans = True
+    def complete(self, commit=True):
+        """Finish transaction: commit or discard all deferred files"""
+        while self.files:
+            f = self.files.popleft()
+            if commit:
+                f.commit()
+            else:
+                f.discard()
+        self.fs._intrans = False
+class FileActor:
+    def __init__(self):
+        self.files = []
+    def commit(self):
+        for f in self.files:
+            f.commit()
+        self.files.clear()
+    def discard(self):
+        for f in self.files:
+            f.discard()
+        self.files.clear()
+    def append(self, f):
+        self.files.append(f)
+class DaskTransaction(Transaction):
+    def __init__(self, fs):
+        """
+        Parameters
+        ----------
+        fs: FileSystem instance
+        """
+        import distributed
+        super().__init__(fs)
+        client = distributed.default_client()
+        self.files = client.submit(FileActor, actor=True).result()
+    def complete(self, commit=True):
+        """Finish transaction: commit or discard all deferred files"""
+        if commit:
+            self.files.commit().result()
+        else:
+            self.files.discard().result()
+        self.fs._intrans = False

lib/python3.11/site-packages/fsspec/utils.py ADDED Viewed

	@@ -0,0 +1,742 @@

+from __future__ import annotations
+import contextlib
+import logging
+import math
+import os
+import pathlib
+import re
+import sys
+import tempfile
+from functools import partial
+from hashlib import md5
+from importlib.metadata import version
+from typing import (
+    IO,
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Iterable,
+    Iterator,
+    Sequence,
+    TypeVar,
+)
+from urllib.parse import urlsplit
+if TYPE_CHECKING:
+    from typing_extensions import TypeGuard
+    from fsspec.spec import AbstractFileSystem
+DEFAULT_BLOCK_SIZE = 5 * 2**20
+T = TypeVar("T")
+def infer_storage_options(
+    urlpath: str, inherit_storage_options: dict[str, Any] | None = None
+) -> dict[str, Any]:
+    """Infer storage options from URL path and merge it with existing storage
+    options.
+    Parameters
+    ----------
+    urlpath: str or unicode
+        Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
+    inherit_storage_options: dict (optional)
+        Its contents will get merged with the inferred information from the
+        given path
+    Returns
+    -------
+    Storage options dict.
+    Examples
+    --------
+    >>> infer_storage_options('/mnt/datasets/test.csv')  # doctest: +SKIP
+    {"protocol": "file", "path", "/mnt/datasets/test.csv"}
+    >>> infer_storage_options(
+    ...     'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
+    ...     inherit_storage_options={'extra': 'value'},
+    ... )  # doctest: +SKIP
+    {"protocol": "hdfs", "username": "username", "password": "pwd",
+    "host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
+    "url_query": "q=1", "extra": "value"}
+    """
+    # Handle Windows paths including disk name in this special case
+    if (
+        re.match(r"^[a-zA-Z]:[\\/]", urlpath)
+        or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
+    ):
+        return {"protocol": "file", "path": urlpath}
+    parsed_path = urlsplit(urlpath)
+    protocol = parsed_path.scheme or "file"
+    if parsed_path.fragment:
+        path = "#".join([parsed_path.path, parsed_path.fragment])
+    else:
+        path = parsed_path.path
+    if protocol == "file":
+        # Special case parsing file protocol URL on Windows according to:
+        # https://msdn.microsoft.com/en-us/library/jj710207.aspx
+        windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
+        if windows_path:
+            path = "%s:%s" % windows_path.groups()
+    if protocol in ["http", "https"]:
+        # for HTTP, we don't want to parse, as requests will anyway
+        return {"protocol": protocol, "path": urlpath}
+    options: dict[str, Any] = {"protocol": protocol, "path": path}
+    if parsed_path.netloc:
+        # Parse `hostname` from netloc manually because `parsed_path.hostname`
+        # lowercases the hostname which is not always desirable (e.g. in S3):
+        # https://github.com/dask/dask/issues/1417
+        options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
+        if protocol in ("s3", "s3a", "gcs", "gs"):
+            options["path"] = options["host"] + options["path"]
+        else:
+            options["host"] = options["host"]
+        if parsed_path.port:
+            options["port"] = parsed_path.port
+        if parsed_path.username:
+            options["username"] = parsed_path.username
+        if parsed_path.password:
+            options["password"] = parsed_path.password
+    if parsed_path.query:
+        options["url_query"] = parsed_path.query
+    if parsed_path.fragment:
+        options["url_fragment"] = parsed_path.fragment
+    if inherit_storage_options:
+        update_storage_options(options, inherit_storage_options)
+    return options
+def update_storage_options(
+    options: dict[str, Any], inherited: dict[str, Any] | None = None
+) -> None:
+    if not inherited:
+        inherited = {}
+    collisions = set(options) & set(inherited)
+    if collisions:
+        for collision in collisions:
+            if options.get(collision) != inherited.get(collision):
+                raise KeyError(
+                    f"Collision between inferred and specified storage "
+                    f"option:\n{collision}"
+                )
+    options.update(inherited)
+# Compression extensions registered via fsspec.compression.register_compression
+compressions: dict[str, str] = {}
+def infer_compression(filename: str) -> str | None:
+    """Infer compression, if available, from filename.
+    Infer a named compression type, if registered and available, from filename
+    extension. This includes builtin (gz, bz2, zip) compressions, as well as
+    optional compressions. See fsspec.compression.register_compression.
+    """
+    extension = os.path.splitext(filename)[-1].strip(".").lower()
+    if extension in compressions:
+        return compressions[extension]
+    return None
+def build_name_function(max_int: float) -> Callable[[int], str]:
+    """Returns a function that receives a single integer
+    and returns it as a string padded by enough zero characters
+    to align with maximum possible integer
+    >>> name_f = build_name_function(57)
+    >>> name_f(7)
+    '07'
+    >>> name_f(31)
+    '31'
+    >>> build_name_function(1000)(42)
+    '0042'
+    >>> build_name_function(999)(42)
+    '042'
+    >>> build_name_function(0)(0)
+    '0'
+    """
+    # handle corner cases max_int is 0 or exact power of 10
+    max_int += 1e-8
+    pad_length = int(math.ceil(math.log10(max_int)))
+    def name_function(i: int) -> str:
+        return str(i).zfill(pad_length)
+    return name_function
+def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
+    r"""Seek current file to file start, file end, or byte after delimiter seq.
+    Seeks file to next chunk delimiter, where chunks are defined on file start,
+    a delimiting sequence, and file end. Use file.tell() to see location afterwards.
+    Note that file start is a valid split, so must be at offset > 0 to seek for
+    delimiter.
+    Parameters
+    ----------
+    file: a file
+    delimiter: bytes
+        a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
+    blocksize: int
+        Number of bytes to read from the file at once.
+    Returns
+    -------
+    Returns True if a delimiter was found, False if at file start or end.
+    """
+    if file.tell() == 0:
+        # beginning-of-file, return without seek
+        return False
+    # Interface is for binary IO, with delimiter as bytes, but initialize last
+    # with result of file.read to preserve compatibility with text IO.
+    last: bytes | None = None
+    while True:
+        current = file.read(blocksize)
+        if not current:
+            # end-of-file without delimiter
+            return False
+        full = last + current if last else current
+        try:
+            if delimiter in full:
+                i = full.index(delimiter)
+                file.seek(file.tell() - (len(full) - i) + len(delimiter))
+                return True
+            elif len(current) < blocksize:
+                # end-of-file without delimiter
+                return False
+        except (OSError, ValueError):
+            pass
+        last = full[-len(delimiter) :]
+def read_block(
+    f: IO[bytes],
+    offset: int,
+    length: int | None,
+    delimiter: bytes | None = None,
+    split_before: bool = False,
+) -> bytes:
+    """Read a block of bytes from a file
+    Parameters
+    ----------
+    f: File
+        Open file
+    offset: int
+        Byte offset to start read
+    length: int
+        Number of bytes to read, read through end of file if None
+    delimiter: bytes (optional)
+        Ensure reading starts and stops at delimiter bytestring
+    split_before: bool (optional)
+        Start/stop read *before* delimiter bytestring.
+    If using the ``delimiter=`` keyword argument we ensure that the read
+    starts and stops at delimiter boundaries that follow the locations
+    ``offset`` and ``offset + length``.  If ``offset`` is zero then we
+    start at zero, regardless of delimiter.  The bytestring returned WILL
+    include the terminating delimiter string.
+    Examples
+    --------
+    >>> from io import BytesIO  # doctest: +SKIP
+    >>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300')  # doctest: +SKIP
+    >>> read_block(f, 0, 13)  # doctest: +SKIP
+    b'Alice, 100\\nBo'
+    >>> read_block(f, 0, 13, delimiter=b'\\n')  # doctest: +SKIP
+    b'Alice, 100\\nBob, 200\\n'
+    >>> read_block(f, 10, 10, delimiter=b'\\n')  # doctest: +SKIP
+    b'Bob, 200\\nCharlie, 300'
+    """
+    if delimiter:
+        f.seek(offset)
+        found_start_delim = seek_delimiter(f, delimiter, 2**16)
+        if length is None:
+            return f.read()
+        start = f.tell()
+        length -= start - offset
+        f.seek(start + length)
+        found_end_delim = seek_delimiter(f, delimiter, 2**16)
+        end = f.tell()
+        # Adjust split location to before delimiter iff seek found the
+        # delimiter sequence, not start or end of file.
+        if found_start_delim and split_before:
+            start -= len(delimiter)
+        if found_end_delim and split_before:
+            end -= len(delimiter)
+        offset = start
+        length = end - start
+    f.seek(offset)
+    # TODO: allow length to be None and read to the end of the file?
+    assert length is not None
+    b = f.read(length)
+    return b
+def tokenize(*args: Any, **kwargs: Any) -> str:
+    """Deterministic token
+    (modified from dask.base)
+    >>> tokenize([1, 2, '3'])
+    '9d71491b50023b06fc76928e6eddb952'
+    >>> tokenize('Hello') == tokenize('Hello')
+    True
+    """
+    if kwargs:
+        args += (kwargs,)
+    try:
+        h = md5(str(args).encode())
+    except ValueError:
+        # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
+        h = md5(str(args).encode(), usedforsecurity=False)
+    return h.hexdigest()
+def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
+    """Attempt to convert a path-like object to a string.
+    Parameters
+    ----------
+    filepath: object to be converted
+    Returns
+    -------
+    filepath_str: maybe a string version of the object
+    Notes
+    -----
+    Objects supporting the fspath protocol are coerced according to its
+    __fspath__ method.
+    For backwards compatibility with older Python version, pathlib.Path
+    objects are specially coerced.
+    Any other object is passed through unchanged, which includes bytes,
+    strings, buffers, or anything else that's not even path-like.
+    """
+    if isinstance(filepath, str):
+        return filepath
+    elif hasattr(filepath, "__fspath__"):
+        return filepath.__fspath__()
+    elif isinstance(filepath, pathlib.Path):
+        return str(filepath)
+    elif hasattr(filepath, "path"):
+        return filepath.path
+    else:
+        return filepath  # type: ignore[return-value]
+def make_instance(
+    cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
+) -> T:
+    inst = cls(*args, **kwargs)
+    inst._determine_worker()  # type: ignore[attr-defined]
+    return inst
+def common_prefix(paths: Iterable[str]) -> str:
+    """For a list of paths, find the shortest prefix common to all"""
+    parts = [p.split("/") for p in paths]
+    lmax = min(len(p) for p in parts)
+    end = 0
+    for i in range(lmax):
+        end = all(p[i] == parts[0][i] for p in parts)
+        if not end:
+            break
+    i += end
+    return "/".join(parts[0][:i])
+def other_paths(
+    paths: list[str],
+    path2: str | list[str],
+    exists: bool = False,
+    flatten: bool = False,
+) -> list[str]:
+    """In bulk file operations, construct a new file tree from a list of files
+    Parameters
+    ----------
+    paths: list of str
+        The input file tree
+    path2: str or list of str
+        Root to construct the new list in. If this is already a list of str, we just
+        assert it has the right number of elements.
+    exists: bool (optional)
+        For a str destination, it is already exists (and is a dir), files should
+        end up inside.
+    flatten: bool (optional)
+        Whether to flatten the input directory tree structure so that the output files
+        are in the same directory.
+    Returns
+    -------
+    list of str
+    """
+    if isinstance(path2, str):
+        path2 = path2.rstrip("/")
+        if flatten:
+            path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
+        else:
+            cp = common_prefix(paths)
+            if exists:
+                cp = cp.rsplit("/", 1)[0]
+            if not cp and all(not s.startswith("/") for s in paths):
+                path2 = ["/".join([path2, p]) for p in paths]
+            else:
+                path2 = [p.replace(cp, path2, 1) for p in paths]
+    else:
+        assert len(paths) == len(path2)
+    return path2
+def is_exception(obj: Any) -> bool:
+    return isinstance(obj, BaseException)
+def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
+    for attr in ["read", "close", "tell"]:
+        if not hasattr(f, attr):
+            return False
+    return True
+def get_protocol(url: str) -> str:
+    url = stringify_path(url)
+    parts = re.split(r"(\:\:|\://)", url, 1)
+    if len(parts) > 1:
+        return parts[0]
+    return "file"
+def can_be_local(path: str) -> bool:
+    """Can the given URL be used with open_local?"""
+    from fsspec import get_filesystem_class
+    try:
+        return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
+    except (ValueError, ImportError):
+        # not in registry or import failed
+        return False
+def get_package_version_without_import(name: str) -> str | None:
+    """For given package name, try to find the version without importing it
+    Import and package.__version__ is still the backup here, so an import
+    *might* happen.
+    Returns either the version string, or None if the package
+    or the version was not readily  found.
+    """
+    if name in sys.modules:
+        mod = sys.modules[name]
+        if hasattr(mod, "__version__"):
+            return mod.__version__
+    try:
+        return version(name)
+    except:  # noqa: E722
+        pass
+    try:
+        import importlib
+        mod = importlib.import_module(name)
+        return mod.__version__
+    except (ImportError, AttributeError):
+        return None
+def setup_logging(
+    logger: logging.Logger | None = None,
+    logger_name: str | None = None,
+    level: str = "DEBUG",
+    clear: bool = True,
+) -> logging.Logger:
+    if logger is None and logger_name is None:
+        raise ValueError("Provide either logger object or logger name")
+    logger = logger or logging.getLogger(logger_name)
+    handle = logging.StreamHandler()
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
+    )
+    handle.setFormatter(formatter)
+    if clear:
+        logger.handlers.clear()
+    logger.addHandler(handle)
+    logger.setLevel(level)
+    return logger
+def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
+    return fs.unstrip_protocol(name)
+def mirror_from(
+    origin_name: str, methods: Iterable[str]
+) -> Callable[[type[T]], type[T]]:
+    """Mirror attributes and methods from the given
+    origin_name attribute of the instance to the
+    decorated class"""
+    def origin_getter(method: str, self: Any) -> Any:
+        origin = getattr(self, origin_name)
+        return getattr(origin, method)
+    def wrapper(cls: type[T]) -> type[T]:
+        for method in methods:
+            wrapped_method = partial(origin_getter, method)
+            setattr(cls, method, property(wrapped_method))
+        return cls
+    return wrapper
+@contextlib.contextmanager
+def nullcontext(obj: T) -> Iterator[T]:
+    yield obj
+def merge_offset_ranges(
+    paths: list[str],
+    starts: list[int] | int,
+    ends: list[int] | int,
+    max_gap: int = 0,
+    max_block: int | None = None,
+    sort: bool = True,
+) -> tuple[list[str], list[int], list[int]]:
+    """Merge adjacent byte-offset ranges when the inter-range
+    gap is <= `max_gap`, and when the merged byte range does not
+    exceed `max_block` (if specified). By default, this function
+    will re-order the input paths and byte ranges to ensure sorted
+    order. If the user can guarantee that the inputs are already
+    sorted, passing `sort=False` will skip the re-ordering.
+    """
+    # Check input
+    if not isinstance(paths, list):
+        raise TypeError
+    if not isinstance(starts, list):
+        starts = [starts] * len(paths)
+    if not isinstance(ends, list):
+        ends = [ends] * len(paths)
+    if len(starts) != len(paths) or len(ends) != len(paths):
+        raise ValueError
+    # Early Return
+    if len(starts) <= 1:
+        return paths, starts, ends
+    starts = [s or 0 for s in starts]
+    # Sort by paths and then ranges if `sort=True`
+    if sort:
+        paths, starts, ends = (
+            list(v)
+            for v in zip(
+                *sorted(
+                    zip(paths, starts, ends),
+                )
+            )
+        )
+    if paths:
+        # Loop through the coupled `paths`, `starts`, and
+        # `ends`, and merge adjacent blocks when appropriate
+        new_paths = paths[:1]
+        new_starts = starts[:1]
+        new_ends = ends[:1]
+        for i in range(1, len(paths)):
+            if paths[i] == paths[i - 1] and new_ends[-1] is None:
+                continue
+            elif (
+                paths[i] != paths[i - 1]
+                or ((starts[i] - new_ends[-1]) > max_gap)
+                or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
+            ):
+                # Cannot merge with previous block.
+                # Add new `paths`, `starts`, and `ends` elements
+                new_paths.append(paths[i])
+                new_starts.append(starts[i])
+                new_ends.append(ends[i])
+            else:
+                # Merge with previous block by updating the
+                # last element of `ends`
+                new_ends[-1] = ends[i]
+        return new_paths, new_starts, new_ends
+    # `paths` is empty. Just return input lists
+    return paths, starts, ends
+def file_size(filelike: IO[bytes]) -> int:
+    """Find length of any open read-mode file-like"""
+    pos = filelike.tell()
+    try:
+        return filelike.seek(0, 2)
+    finally:
+        filelike.seek(pos)
+@contextlib.contextmanager
+def atomic_write(path: str, mode: str = "wb"):
+    """
+    A context manager that opens a temporary file next to `path` and, on exit,
+    replaces `path` with the temporary file, thereby updating `path`
+    atomically.
+    """
+    fd, fn = tempfile.mkstemp(
+        dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
+    )
+    try:
+        with open(fd, mode) as fp:
+            yield fp
+    except BaseException:
+        with contextlib.suppress(FileNotFoundError):
+            os.unlink(fn)
+        raise
+    else:
+        os.replace(fn, path)
+def _translate(pat, STAR, QUESTION_MARK):
+    # Copied from: https://github.com/python/cpython/pull/106703.
+    res: list[str] = []
+    add = res.append
+    i, n = 0, len(pat)
+    while i < n:
+        c = pat[i]
+        i = i + 1
+        if c == "*":
+            # compress consecutive `*` into one
+            if (not res) or res[-1] is not STAR:
+                add(STAR)
+        elif c == "?":
+            add(QUESTION_MARK)
+        elif c == "[":
+            j = i
+            if j < n and pat[j] == "!":
+                j = j + 1
+            if j < n and pat[j] == "]":
+                j = j + 1
+            while j < n and pat[j] != "]":
+                j = j + 1
+            if j >= n:
+                add("\\[")
+            else:
+                stuff = pat[i:j]
+                if "-" not in stuff:
+                    stuff = stuff.replace("\\", r"\\")
+                else:
+                    chunks = []
+                    k = i + 2 if pat[i] == "!" else i + 1
+                    while True:
+                        k = pat.find("-", k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k + 1
+                        k = k + 3
+                    chunk = pat[i:j]
+                    if chunk:
+                        chunks.append(chunk)
+                    else:
+                        chunks[-1] += "-"
+                    # Remove empty ranges -- invalid in RE.
+                    for k in range(len(chunks) - 1, 0, -1):
+                        if chunks[k - 1][-1] > chunks[k][0]:
+                            chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
+                            del chunks[k]
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = "-".join(
+                        s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
+                    )
+                # Escape set operations (&&, ~~ and ||).
+                stuff = re.sub(r"([&~|])", r"\\\1", stuff)
+                i = j + 1
+                if not stuff:
+                    # Empty range: never match.
+                    add("(?!)")
+                elif stuff == "!":
+                    # Negated empty range: match any character.
+                    add(".")
+                else:
+                    if stuff[0] == "!":
+                        stuff = "^" + stuff[1:]
+                    elif stuff[0] in ("^", "["):
+                        stuff = "\\" + stuff
+                    add(f"[{stuff}]")
+        else:
+            add(re.escape(c))
+    assert i == n
+    return res
+def glob_translate(pat):
+    # Copied from: https://github.com/python/cpython/pull/106703.
+    # The keyword parameters' values are fixed to:
+    # recursive=True, include_hidden=True, seps=None
+    """Translate a pathname with shell wildcards to a regular expression."""
+    if os.path.altsep:
+        seps = os.path.sep + os.path.altsep
+    else:
+        seps = os.path.sep
+    escaped_seps = "".join(map(re.escape, seps))
+    any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
+    not_sep = f"[^{escaped_seps}]"
+    one_last_segment = f"{not_sep}+"
+    one_segment = f"{one_last_segment}{any_sep}"
+    any_segments = f"(?:.+{any_sep})?"
+    any_last_segments = ".*"
+    results = []
+    parts = re.split(any_sep, pat)
+    last_part_idx = len(parts) - 1
+    for idx, part in enumerate(parts):
+        if part == "*":
+            results.append(one_segment if idx < last_part_idx else one_last_segment)
+            continue
+        if part == "**":
+            results.append(any_segments if idx < last_part_idx else any_last_segments)
+            continue
+        elif "**" in part:
+            raise ValueError(
+                "Invalid pattern: '**' can only be an entire path component"
+            )
+        if part:
+            results.extend(_translate(part, f"{not_sep}*", not_sep))
+        if idx < last_part_idx:
+            results.append(any_sep)
+    res = "".join(results)
+    return rf"(?s:{res})\Z"

lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so ADDED Viewed

Binary file (332 kB). View file

lib/python3.11/site-packages/functorch/__init__.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torch._functorch.deprecated import (
+    combine_state_for_ensemble,
+    functionalize,
+    grad,
+    grad_and_value,
+    hessian,
+    jacfwd,
+    jacrev,
+    jvp,
+    make_functional,
+    make_functional_with_buffers,
+    vjp,
+    vmap,
+)
+# utilities. Maybe these should go in their own namespace in the future?
+from torch._functorch.make_functional import (
+    FunctionalModule,
+    FunctionalModuleWithBuffers,
+)
+# Top-level APIs. Please think carefully before adding something to the
+# top-level namespace:
+# - private helper functions should go into torch._functorch
+# - very experimental things should go into functorch.experimental
+# - compilation related things should go into functorch.compile
+# Was never documented
+from torch._functorch.python_key import make_fx
+__version__ = torch.__version__

lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (947 Bytes). View file

lib/python3.11/site-packages/functorch/_src/__init__.py ADDED Viewed

File without changes

lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (229 Bytes). View file

lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.aot_autograd import (
+    aot_autograd_decompositions,
+    KNOWN_TYPES,
+    PytreeThunk,
+)

lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (417 Bytes). View file

lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.eager_transforms import (
+    _assert_wrapped_functional,
+    _unwrap_functional_tensor,
+)

lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (406 Bytes). View file

lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.make_functional import _swap_state

lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (329 Bytes). View file

lib/python3.11/site-packages/functorch/_src/vmap/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.vmap import (
+    _add_batch_dim,
+    _broadcast_to_and_flatten,
+    _create_batched_inputs,
+    _get_name,
+    _process_batched_inputs,
+    _remove_batch_dim,
+    _unwrap_batched,
+    _validate_and_get_batch_size,
+    Tensor,
+    tree_flatten,
+    tree_unflatten,
+)

lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (705 Bytes). View file

lib/python3.11/site-packages/functorch/compile/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from torch._functorch import config
+from torch._functorch.aot_autograd import (
+    aot_function,
+    aot_module,
+    aot_module_simplified,
+    compiled_function,
+    compiled_module,
+    get_aot_compilation_context,
+    get_aot_graph_name,
+    get_graph_being_compiled,
+    make_boxed_compiler,
+    make_boxed_func,
+)
+from torch._functorch.compilers import (
+    debug_compile,
+    default_decompositions,
+    draw_graph_compile,
+    memory_efficient_fusion,
+    nnc_jit,
+    nop,
+    print_compile,
+    ts_compile,
+)
+from torch._functorch.fx_minifier import minifier
+from torch._functorch.partitioners import (
+    default_partition,
+    draw_graph,
+    draw_joint_graph,
+    min_cut_rematerialization_partition,
+)
+from torch._functorch.python_key import pythonkey_decompose

lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.47 kB). View file

lib/python3.11/site-packages/functorch/dim/__init__.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import dis
+import inspect
+from typing import Sequence, Union
+import torch
+import functorch._C
+from functorch._C import dim as _C
+from .tree_map import tree_flatten, tree_map
+from .wrap_type import wrap_type
+_C._patch_tensor_class()
+dims, DimList, dimlists = _C.dims, _C.DimList, _C.dimlists
+class DimensionMismatchError(Exception):
+    pass
+class DimensionBindError(Exception):
+    pass
+from . import op_properties
+# use dict to avoid writing C++ bindings for set
+pointwise = {t: True for t in op_properties.pointwise}
+use_c = True
+if not use_c:
+    from . import reference
+class _Tensor:
+    # fast path around slow wrapping/unwrapping logic for simply queries used
+    # by the implementation...
+    @property
+    def dims(self):
+        return tuple(d for d in self._levels if isinstance(d, Dim))
+    def dim(self):
+        return self.ndim
+    if use_c:
+        __torch_function__ = classmethod(_C.__torch_function__)
+        expand = _C._instancemethod(_C.expand)
+    else:
+        __torch_function__ = reference.__torch_function__
+        expand = reference.expand
+    index = _C._instancemethod(_C.index)
+    def __repr__(self):
+        tensor, levels, ndim = self._tensor, self._levels, self.ndim
+        return f"{tensor}\nwith dims={tuple(l + ndim if isinstance(l, int) else l for l in levels)} sizes={tuple(tensor.size())}"
+TensorLike = (_Tensor, torch.Tensor)
+class Dim(_C.Dim, _Tensor):
+    # note that _C.Dim comes before tensor because we want the Dim API for things like size to take precendence.
+    # Tensor defines format, but we want to print Dims with special formatting
+    __format__ = object.__format__
+class Tensor(_Tensor, _C.Tensor):
+    if not use_c:
+        from_batched = staticmethod(_C.Tensor_from_batched)
+    from_positional = staticmethod(_C.Tensor_from_positional)
+    sum = _C._instancemethod(_C.Tensor_sum)
+def cat(tensors, dim, new_dim):
+    n = dims()
+    return stack(tensors, n, dim).index([n, dim], new_dim)
+if use_c:
+    _wrap = _C._wrap
+    def _def(name, *args, **kwargs):
+        orig = getattr(torch.Tensor, name)
+        setattr(_Tensor, name, _C._instancemethod(_wrap(orig, *args, **kwargs)))
+    t__getitem__ = _C._instancemethod(_C.__getitem__)
+    stack = _C.stack
+    split = _C._instancemethod(_C.split)
+else:
+    _wrap, _def = reference._wrap, reference._def
+    t__getitem__ = reference.t__getitem__
+    stack = reference.stack
+    split = reference.split
+# note: there is no python reference
+t__setitem__ = _C._instancemethod(_C.__setitem__)
+# this is patched in the C API because otherwise torch.Tensor will
+# no longer be considered a sequence and things will break
+# torch.Tensor.__getitem__ = t__getitem__
+_Tensor.__getitem__ = t__getitem__
+# torch.Tensor.__setitem__ = t__setitem__
+_Tensor.__setitem__ = t__setitem__
+torch.Tensor.split = split
+_Tensor.split = split
+torch.Tensor.expand = _C._instancemethod(_C.expand)
+torch.Tensor.index = _C._instancemethod(_C.index)
+wrap_type(use_c, _Tensor, torch.Tensor, _Tensor.__torch_function__)
+del _Tensor.ndim
+if use_c:
+    _Tensor.order = _C._instancemethod(_C.order)
+else:
+    _Tensor.order = reference.positional
+_def("mean")
+_def("sum")
+_def("all")
+_def("amax")
+_def("amin")
+_def("aminmax")
+_def("any")
+_def("count_nonzero")
+_def("logsumexp")
+_def("nanmean")
+_def("nansum")
+_def("prod")
+_def("std", keepdim_offset=2)
+_def("var", keepdim_offset=2)
+_def("max", single_dim=True)
+_def("min", single_dim=True)
+_def("argmax", single_dim=True)
+_def("argmin", single_dim=True)
+_def("kthvalue", single_dim=True)
+_def("median", single_dim=True)
+_def("nanmedian", single_dim=True)
+_def("mode", single_dim=True)
+_def("sort", reduce=False)
+_def("argsort", reduce=False)
+_def("unbind", single_dim=True)
+_def("chunk", dim_offset=1, reduce=False)
+_def("cummax", single_dim=True, reduce=False)
+_def("cummin", single_dim=True, reduce=False)
+_def("cumprod", single_dim=True, reduce=False)
+_def("cumprod_", single_dim=True, reduce=False)
+_def("cumsum", single_dim=True, reduce=False)
+_def("cumsum_", single_dim=True, reduce=False)
+_def("logcumsumexp", single_dim=True, reduce=False)
+_def("renorm", dim_offset=1, single_dim=True, reduce=False)
+_def("softmax", single_dim=True, reduce=False)
+softmax = _wrap(torch.nn.functional.softmax, single_dim=True, reduce=False)
+# stuff to handle in the future, because they require special
+# binding logic for dims
+# cross
+# diag_embed
+# diagonal
+# diagonal_scatter
+# diff
+# nanquantile
+# quantile
+# roll
+# rot90
+# topk (new dimes on output)
+# should these all be subsumed by inplace indexing?
+# index_add_
+# index_add
+# index_copy
+# index_copy_
+# index_fill
+# index_fill_
+# index_select
+# scatter
+# scatter_
+# scatter_add
+# scatter_add_
+# scatter_reduce

lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (7.99 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc ADDED Viewed

Binary file (1.29 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc ADDED Viewed

Binary file (5.61 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc ADDED Viewed

Binary file (6.89 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc ADDED Viewed

Binary file (2.49 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc ADDED Viewed

Binary file (12.1 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc ADDED Viewed

Binary file (32.4 kB). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc ADDED Viewed

Binary file (803 Bytes). View file

lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc ADDED Viewed

Binary file (2.55 kB). View file

lib/python3.11/site-packages/functorch/dim/batch_tensor.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from contextlib import contextmanager
+from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
+_enabled = False
+@contextmanager
+def _enable_layers(dims):
+    global _enabled
+    assert not _enabled
+    input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
+    n = len(input)
+    try:
+        _vmap_add_layers(input)
+        _enabled = True
+        yield
+    finally:
+        _enabled = False
+        _vmap_remove_layers(n)

lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from . import _Tensor, Tensor
+from .reference import _dims, _enable_layers, llist, ltuple
+class DelayedMulTensor(_Tensor):
+    def __init__(self, lhs, rhs):
+        self._lhs, self._rhs = lhs, rhs
+        self._data = None
+        self._levels_data = None
+        self._has_device = lhs._has_device or rhs._has_device
+        self._batchtensor_data = None
+        self._tensor_data = None
+    @property
+    def _levels(self):
+        if self._levels_data is None:
+            levels = llist(self._lhs._levels)
+            for l in self._rhs._levels:
+                if l not in levels:
+                    levels.append(l)
+            self._levels_data = ltuple(levels)
+        return self._levels_data
+    @property
+    def _batchtensor(self):
+        if self._batchtensor_data is None:
+            with _enable_layers(self._levels):
+                print("bt multiply fallback")
+                self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
+        return self._batchtensor_data
+    @property
+    def _tensor(self):
+        if self._tensor_data is None:
+            self._tensor_data = Tensor.from_batched(
+                self._batchtensor, self._has_device
+            )._tensor
+        return self._tensor_data
+    @property
+    def ndim(self):
+        return self._batchtensor.ndim
+    @property
+    def dims(self):
+        return ltuple(super().dims)
+    def sum(self, dim):
+        dims = _dims(dim, 0, False, False)
+        n = ord("a")
+        all_levels = self._levels
+        def to_char(d):
+            return chr(n + all_levels.index(d))
+        plhs, levelslhs = self._lhs._tensor, self._lhs._levels
+        prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
+        new_dims = tuple(d for d in self.dims if d not in dims)
+        new_levels = [l for l in self._levels if l not in dims]
+        fmt = "".join(
+            [
+                *(to_char(d) for d in levelslhs),
+                ",",
+                *(to_char(d) for d in levelsrhs),
+                "->",
+                *(to_char(d) for d in new_levels),
+            ]
+        )
+        result_data = torch.einsum(fmt, (plhs, prhs))
+        return Tensor.from_positional(result_data, new_levels, True)

lib/python3.11/site-packages/functorch/dim/dim.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+_vmap_levels = []
+@dataclass
+class LevelInfo:
+    level: int
+    alive: bool = True
+class Dim:
+    def __init__(self, name: str, size: Union[None, int] = None):
+        self.name = name
+        self._size = None
+        self._vmap_level = None
+        if size is not None:
+            self.size = size
+    def __del__(self):
+        if self._vmap_level is not None:
+            _vmap_active_levels[self._vmap_stack].alive = False
+            while (
+                not _vmap_levels[-1].alive and current_level() == _vmap_levels[-1].level
+            ):
+                _vmap_decrement_nesting()
+                _vmap_levels.pop()
+    @property
+    def size(self):
+        assert self.is_bound
+        return self._size
+    @size.setter
+    def size(self, size: int):
+        if self._size is None:
+            self._size = size
+            self._vmap_level = _vmap_increment_nesting(size, "same")
+            self._vmap_stack = len(_vmap_levels)
+            _vmap_levels.append(LevelInfo(self._vmap_level))
+        elif self._size != size:
+            raise DimensionBindError(
+                f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
+            )
+    @property
+    def is_bound(self):
+        return self._size is not None
+    def __repr__(self):
+        return self.name
+def extract_name(inst):
+    assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
+    return inst.argval
+_cache = {}
+def dims(lists=0):
+    frame = inspect.currentframe()
+    assert frame is not None
+    calling_frame = frame.f_back
+    assert calling_frame is not None
+    code, lasti = calling_frame.f_code, calling_frame.f_lasti
+    key = (code, lasti)
+    if key not in _cache:
+        first = lasti // 2 + 1
+        instructions = list(dis.get_instructions(calling_frame.f_code))
+        unpack = instructions[first]
+        if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
+            # just a single dim, not a list
+            name = unpack.argval
+            ctor = Dim if lists == 0 else DimList
+            _cache[key] = lambda: ctor(name=name)
+        else:
+            assert unpack.opname == "UNPACK_SEQUENCE"
+            ndims = unpack.argval
+            names = tuple(
+                extract_name(instructions[first + 1 + i]) for i in range(ndims)
+            )
+            first_list = len(names) - lists
+            _cache[key] = lambda: tuple(
+                Dim(n) if i < first_list else DimList(name=n)
+                for i, n in enumerate(names)
+            )
+    return _cache[key]()
+def _dim_set(positional, arg):
+    def convert(a):
+        if isinstance(a, Dim):
+            return a
+        else:
+            assert isinstance(a, int)
+            return positional[a]
+    if arg is None:
+        return positional
+    elif not isinstance(arg, (Dim, int)):
+        return tuple(convert(a) for a in arg)
+    else:
+        return (convert(arg),)

lib/python3.11/site-packages/functorch/dim/magic_trace.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import signal
+import subprocess
+from contextlib import contextmanager
+@contextmanager
+def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
+    pid = os.getpid()
+    if not os.path.exists(magic_trace_cache):
+        print(f"Downloading magic_trace to: {magic_trace_cache}")
+        subprocess.run(
+            [
+                "wget",
+                "-O",
+                magic_trace_cache,
+                "-q",
+                "https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
+            ]
+        )
+        subprocess.run(["chmod", "+x", magic_trace_cache])
+    args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
+    p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
+    while True:
+        x = p.stderr.readline()
+        print(x)
+        if "Attached" in x:
+            break
+    try:
+        yield
+    finally:
+        p.send_signal(signal.SIGINT)
+        r = p.wait()
+        print(p.stderr.read())
+        p.stderr.close()
+        if r != 0:
+            raise ValueError(f"magic_trace exited abnormally: {r}")