GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/fs/local.py
2025-06-22 13:36:01 +05:30

114 lines
3.4 KiB
Python

# Copyright 2009-2012 Yelp and Contributors
# Copyright 2015 Yelp
# Copyright 2017 Yelp
# Copyright 2019 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import hashlib
import logging
import os
import shutil
from mrjob.cat import decompress
from mrjob.fs.base import Filesystem
from mrjob.parse import is_uri
log = logging.getLogger(__name__)
class LocalFilesystem(Filesystem):
"""Filesystem for local files. Typically you will get one of these via
``MRJobRunner().fs``.
"""
def can_handle_path(self, path):
return path.startswith('file:///') or not is_uri(path)
def du(self, path_glob):
path_glob = _from_file_uri(path_glob)
return sum(os.path.getsize(path) for path in self.ls(path_glob))
def ls(self, path_glob):
bare_path_glob = _from_file_uri(path_glob)
uri_scheme = path_glob[0:-len(bare_path_glob)] # 'file:///' or ''
for path in glob.glob(bare_path_glob):
if os.path.isdir(path):
for dirname, _, filenames in os.walk(path, followlinks=True):
for filename in filenames:
yield uri_scheme + os.path.join(dirname, filename)
else:
yield uri_scheme + path
def _cat_file(self, path):
path = _from_file_uri(path)
with open(path, 'rb') as f:
for chunk in decompress(f, path):
yield chunk
def exists(self, path_glob):
path_glob = _from_file_uri(path_glob)
return bool(glob.glob(path_glob))
def mkdir(self, path):
path = _from_file_uri(path)
if not os.path.isdir(path):
os.makedirs(path)
def put(self, src, path):
"""Copy a file from *src* to *path*"""
path = _from_file_uri(path)
shutil.copyfile(src, path)
def rm(self, path_glob):
path_glob = _from_file_uri(path_glob)
for path in glob.glob(path_glob):
if os.path.isdir(path):
log.debug('Recursively deleting %s' % path)
shutil.rmtree(path)
else:
log.debug('Deleting %s' % path)
os.remove(path)
def touchz(self, path):
path = _from_file_uri(path)
if os.path.isfile(path) and os.path.getsize(path) != 0:
raise OSError('Non-empty file %r already exists!' % (path,))
# zero out the file
with open(path, 'w'):
pass
def _md5sum_file(self, fileobj, block_size=(512 ** 2)): # 256K default
md5 = hashlib.md5()
while True:
data = fileobj.read(block_size)
if not data:
break
md5.update(data)
return md5.hexdigest()
def md5sum(self, path):
path = _from_file_uri(path)
with open(path, 'rb') as f:
return self._md5sum_file(f)
def _from_file_uri(path_or_uri):
if is_uri(path_or_uri):
if path_or_uri.startswith('file:///'):
return path_or_uri[7:] # keep last /
else:
raise ValueError('Not a file:/// URI')
else:
return path_or_uri