mirror of
https://github.com/emilybache/GildedRose-Refactoring-Kata.git
synced 2026-02-09 11:41:37 +00:00
108 lines
3.4 KiB
Python
108 lines
3.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2015-2017 Yelp
|
|
# Copyright 2019 Yelp
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Utilities for ls()ing and cat()ing logs without raising exceptions."""
|
|
from logging import getLogger
|
|
|
|
from mrjob.py2 import to_unicode
|
|
from mrjob.util import to_lines
|
|
|
|
from .ids import _sort_by_recency
|
|
from .ids import _sort_for_spark
|
|
|
|
log = getLogger(__name__)
|
|
|
|
|
|
def _cat_log_lines(fs, path):
|
|
"""Yield lines from the given log.
|
|
|
|
Log errors rather than raising them.
|
|
"""
|
|
try:
|
|
if not fs.exists(path):
|
|
return
|
|
for line in to_lines(fs.cat(path)):
|
|
yield to_unicode(line)
|
|
except (IOError, OSError) as e:
|
|
log.warning("couldn't cat() %s: %r" % (path, e))
|
|
|
|
|
|
def _ls_logs(fs, log_dir_stream, matcher, is_spark=False, **kwargs):
|
|
"""Return a list matches against log files. Used to implement
|
|
``_ls_*_logs()`` functions.
|
|
|
|
This yields dictionaries with ``path`` set to matching log path, and
|
|
other information (e.g. corresponding job_id) returned by *matcher*
|
|
|
|
*fs* is a :py:class:`mrjob.fs.Filesystem`
|
|
|
|
*log_dir_stream* is a sequence of lists of log dirs. The idea is that
|
|
there may be copies of the same logs in multiple places (e.g.
|
|
on S3 and by SSHing into nodes) and we want to list them all without
|
|
finding duplicate copies. This function will go through the lists of
|
|
log dirs in turn, stopping if it finds any matches from a list.
|
|
|
|
*matcher* is a function that takes (log_path, **kwargs)
|
|
and returns either None (no match) or a dictionary with information
|
|
about the path (e.g. the corresponding job_id). It's okay to return
|
|
an empty dict.
|
|
"""
|
|
# wrapper for fs.ls() that turns IOErrors into warnings
|
|
def _fs_ls(path):
|
|
try:
|
|
log.debug(' listing logs in %s' % log_dir)
|
|
if fs.exists(log_dir):
|
|
for path in fs.ls(log_dir):
|
|
yield path
|
|
except (IOError, OSError) as e:
|
|
log.warning("couldn't ls() %s: %r" % (log_dir, e))
|
|
|
|
for log_dirs in log_dir_stream:
|
|
if isinstance(log_dirs, str):
|
|
raise TypeError
|
|
|
|
matched = False
|
|
|
|
for log_dir in log_dirs:
|
|
matches = []
|
|
|
|
for path in _fs_ls(log_dir):
|
|
match = matcher(path, **kwargs)
|
|
if match is not None:
|
|
match['path'] = path
|
|
matches.append(match)
|
|
|
|
if matches:
|
|
matched = True
|
|
|
|
if is_spark:
|
|
matches = _sort_for_spark(matches)
|
|
else:
|
|
matches = _sort_by_recency(matches)
|
|
|
|
for match in matches:
|
|
yield match
|
|
|
|
if matched:
|
|
return # e.g. don't check S3 if we can get logs via SSH
|
|
|
|
|
|
def _logs_exist(fs, path):
|
|
"""Do ``fs.exists(path)``, and return ``None`` if it raises ``IOError``"""
|
|
try:
|
|
return fs.exists(path)
|
|
except IOError:
|
|
return None
|