GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/fs/base.py
2025-06-22 13:36:01 +05:30

151 lines
5.1 KiB
Python

# Copyright 2009-2015 Yelp and Contributors
# Copyright 2017 Yelp
# Copyright 2019 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os.path
import posixpath
from mrjob.parse import is_uri
from mrjob.parse import urlparse
log = logging.getLogger(__name__)
class Filesystem(object):
"""Some simple filesystem operations that are common across the local
filesystem, S3, GCS, HDFS, and remote machines via SSH.
Different runners provide functionality for different filesystems via their
:py:attr:`~mrjob.runner.MRJobRunner.fs` attribute. Generally a runner will
wrap one or more filesystems with
:py:class:`mrjob.fs.composite.CompositeFilesystem`.
Schemes supported:
* :py:class:`mrjob.fs.gcs.GCSFilesystem`: ``gs://``
* :py:class:`mrjob.fs.hadoop.HadoopFilesystem`: ``hdfs://`` and other URIs
* :py:class:`mrjob.fs.local.LocalFilesystem`: paths and ``file://`` URIs
* :py:class:`mrjob.fs.s3.S3Filesystem`: ``s3://``, ``s3a://``, ``s3n://``,
* :py:class:`mrjob.fs.ssh.SSHFilesystem`: ``ssh://``
.. versionchanged:: 0.6.12
`LocalFilesystem` added support for ``file://`` URIs
"""
# Note: currently, we're not very consistent about the names of arguments
# to these methods in subclasses, which we'll fix in v0.7.0 (see #1979).
def can_handle_path(self, path):
"""Can we handle this path at all?"""
False
def cat(self, path_glob):
"""cat all files matching **path_glob**, decompressing if necessary
This yields bytes, which don't necessarily correspond to lines
(see #1544). If multiple files are catted, yields ``b''`` between
each file.
"""
for i, filename in enumerate(self.ls(path_glob)):
if i > 0:
yield b'' # mark end of previous file
for line in self._cat_file(filename):
yield line
def du(self, path_glob):
"""Get the total size of files matching ``path_glob``
Corresponds roughly to: ``hadoop fs -du path_glob``
"""
raise NotImplementedError
def ls(self, path_glob):
"""Recursively list all files in the given path.
We don't return directories for compatibility with S3 (which
has no concept of them)
Corresponds roughly to: ``hadoop fs -ls -R path_glob``
"""
raise NotImplementedError
def _cat_file(self, path):
"""Yield the contents of the file at *path* as a series of ``bytes``,
not necessarily respecting line boundaries."""
raise NotImplementedError
def exists(self, path_glob):
"""Does the given path/URI exist?
Corresponds roughly to: ``hadoop fs -test -e path_glob``
"""
raise NotImplementedError
def join(self, path, *paths):
"""Join *paths* onto *path* (which may be a URI)"""
all_paths = (path,) + paths
# if there's a URI, we only care about it and what follows
for i in range(len(all_paths), 0, -1):
if is_uri(all_paths[i - 1]):
scheme, netloc, uri_path = urlparse(all_paths[i - 1])[:3]
return '%s://%s%s' % (
scheme, netloc, posixpath.join(
uri_path or '/', *all_paths[i:]))
else:
return os.path.join(*all_paths)
def mkdir(self, path):
"""Create the given dir and its subdirs (if they don't already
exist). On cloud filesystems (e.g. S3), also create the corresponding
bucket as needed
Corresponds roughly to: ``hadoop fs -mkdir -p path``
.. versionadded:: 0.6.8 creates buckets on cloud filesystems
"""
raise NotImplementedError
def put(self, src, path):
"""Upload a file on the local filesystem (*src*) to *path*.
Like with :py:func:`shutil.copyfile`, *path* should be the full path
of the new file, not a directory which should contain it.
Corresponds roughly to ``hadoop fs -put src path``.
.. versionadded:: 0.6.8
"""
raise NotImplementedError
def rm(self, path_glob):
"""Recursively delete the given file/directory, if it exists
Corresponds roughly to: ``hadoop fs -rm -R path_glob``
"""
raise NotImplementedError
def touchz(self, path):
"""Make an empty file in the given location. Raises an error if
a non-zero length file already exists in that location.
Correponds to: ``hadoop fs -touchz path``
"""
raise NotImplementedError
def md5sum(self, path):
"""Generate the md5 sum of the file at *path*"""
raise NotImplementedError