mirror of
https://github.com/emilybache/GildedRose-Refactoring-Kata.git
synced 2026-02-10 04:01:19 +00:00
114 lines
3.4 KiB
Python
114 lines
3.4 KiB
Python
# Copyright 2009-2012 Yelp and Contributors
|
|
# Copyright 2015 Yelp
|
|
# Copyright 2017 Yelp
|
|
# Copyright 2019 Yelp
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import glob
|
|
import hashlib
|
|
import logging
|
|
import os
|
|
import shutil
|
|
|
|
from mrjob.cat import decompress
|
|
from mrjob.fs.base import Filesystem
|
|
from mrjob.parse import is_uri
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class LocalFilesystem(Filesystem):
|
|
"""Filesystem for local files. Typically you will get one of these via
|
|
``MRJobRunner().fs``.
|
|
"""
|
|
def can_handle_path(self, path):
|
|
return path.startswith('file:///') or not is_uri(path)
|
|
|
|
def du(self, path_glob):
|
|
path_glob = _from_file_uri(path_glob)
|
|
return sum(os.path.getsize(path) for path in self.ls(path_glob))
|
|
|
|
def ls(self, path_glob):
|
|
bare_path_glob = _from_file_uri(path_glob)
|
|
uri_scheme = path_glob[0:-len(bare_path_glob)] # 'file:///' or ''
|
|
|
|
for path in glob.glob(bare_path_glob):
|
|
if os.path.isdir(path):
|
|
for dirname, _, filenames in os.walk(path, followlinks=True):
|
|
for filename in filenames:
|
|
yield uri_scheme + os.path.join(dirname, filename)
|
|
else:
|
|
yield uri_scheme + path
|
|
|
|
def _cat_file(self, path):
|
|
path = _from_file_uri(path)
|
|
with open(path, 'rb') as f:
|
|
for chunk in decompress(f, path):
|
|
yield chunk
|
|
|
|
def exists(self, path_glob):
|
|
path_glob = _from_file_uri(path_glob)
|
|
return bool(glob.glob(path_glob))
|
|
|
|
def mkdir(self, path):
|
|
path = _from_file_uri(path)
|
|
if not os.path.isdir(path):
|
|
os.makedirs(path)
|
|
|
|
def put(self, src, path):
|
|
"""Copy a file from *src* to *path*"""
|
|
path = _from_file_uri(path)
|
|
shutil.copyfile(src, path)
|
|
|
|
def rm(self, path_glob):
|
|
path_glob = _from_file_uri(path_glob)
|
|
for path in glob.glob(path_glob):
|
|
if os.path.isdir(path):
|
|
log.debug('Recursively deleting %s' % path)
|
|
shutil.rmtree(path)
|
|
else:
|
|
log.debug('Deleting %s' % path)
|
|
os.remove(path)
|
|
|
|
def touchz(self, path):
|
|
path = _from_file_uri(path)
|
|
if os.path.isfile(path) and os.path.getsize(path) != 0:
|
|
raise OSError('Non-empty file %r already exists!' % (path,))
|
|
|
|
# zero out the file
|
|
with open(path, 'w'):
|
|
pass
|
|
|
|
def _md5sum_file(self, fileobj, block_size=(512 ** 2)): # 256K default
|
|
md5 = hashlib.md5()
|
|
while True:
|
|
data = fileobj.read(block_size)
|
|
if not data:
|
|
break
|
|
md5.update(data)
|
|
return md5.hexdigest()
|
|
|
|
def md5sum(self, path):
|
|
path = _from_file_uri(path)
|
|
with open(path, 'rb') as f:
|
|
return self._md5sum_file(f)
|
|
|
|
|
|
def _from_file_uri(path_or_uri):
|
|
if is_uri(path_or_uri):
|
|
if path_or_uri.startswith('file:///'):
|
|
return path_or_uri[7:] # keep last /
|
|
else:
|
|
raise ValueError('Not a file:/// URI')
|
|
else:
|
|
return path_or_uri
|