GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/logs/wrap.py
2025-06-22 13:36:01 +05:30

108 lines
3.4 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2015-2017 Yelp
# Copyright 2019 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for ls()ing and cat()ing logs without raising exceptions."""
from logging import getLogger
from mrjob.py2 import to_unicode
from mrjob.util import to_lines
from .ids import _sort_by_recency
from .ids import _sort_for_spark
log = getLogger(__name__)
def _cat_log_lines(fs, path):
"""Yield lines from the given log.
Log errors rather than raising them.
"""
try:
if not fs.exists(path):
return
for line in to_lines(fs.cat(path)):
yield to_unicode(line)
except (IOError, OSError) as e:
log.warning("couldn't cat() %s: %r" % (path, e))
def _ls_logs(fs, log_dir_stream, matcher, is_spark=False, **kwargs):
"""Return a list matches against log files. Used to implement
``_ls_*_logs()`` functions.
This yields dictionaries with ``path`` set to matching log path, and
other information (e.g. corresponding job_id) returned by *matcher*
*fs* is a :py:class:`mrjob.fs.Filesystem`
*log_dir_stream* is a sequence of lists of log dirs. The idea is that
there may be copies of the same logs in multiple places (e.g.
on S3 and by SSHing into nodes) and we want to list them all without
finding duplicate copies. This function will go through the lists of
log dirs in turn, stopping if it finds any matches from a list.
*matcher* is a function that takes (log_path, **kwargs)
and returns either None (no match) or a dictionary with information
about the path (e.g. the corresponding job_id). It's okay to return
an empty dict.
"""
# wrapper for fs.ls() that turns IOErrors into warnings
def _fs_ls(path):
try:
log.debug(' listing logs in %s' % log_dir)
if fs.exists(log_dir):
for path in fs.ls(log_dir):
yield path
except (IOError, OSError) as e:
log.warning("couldn't ls() %s: %r" % (log_dir, e))
for log_dirs in log_dir_stream:
if isinstance(log_dirs, str):
raise TypeError
matched = False
for log_dir in log_dirs:
matches = []
for path in _fs_ls(log_dir):
match = matcher(path, **kwargs)
if match is not None:
match['path'] = path
matches.append(match)
if matches:
matched = True
if is_spark:
matches = _sort_for_spark(matches)
else:
matches = _sort_by_recency(matches)
for match in matches:
yield match
if matched:
return # e.g. don't check S3 if we can get logs via SSH
def _logs_exist(fs, path):
"""Do ``fs.exists(path)``, and return ``None`` if it raises ``IOError``"""
try:
return fs.exists(path)
except IOError:
return None