GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/logs/wrap.py

# -*- coding: utf-8 -*-
# Copyright 2015-2017 Yelp
# Copyright 2019 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for ls()ing and cat()ing logs without raising exceptions."""
from logging import getLogger

from mrjob.py2 import to_unicode
from mrjob.util import to_lines

from .ids import _sort_by_recency
from .ids import _sort_for_spark

log = getLogger(__name__)


def _cat_log_lines(fs, path):
    """Yield lines from the given log.

    Log errors rather than raising them.
    """
    try:
        if not fs.exists(path):
            return
        for line in to_lines(fs.cat(path)):
            yield to_unicode(line)
    except (IOError, OSError) as e:
        log.warning("couldn't cat() %s: %r" % (path, e))


def _ls_logs(fs, log_dir_stream, matcher, is_spark=False, **kwargs):
    """Return a list matches against log files. Used to implement
    ``_ls_*_logs()`` functions.

    This yields dictionaries with ``path`` set to matching log path, and
    other information (e.g. corresponding job_id) returned by *matcher*

    *fs* is a :py:class:`mrjob.fs.Filesystem`

    *log_dir_stream* is a sequence of lists of log dirs. The idea is that
    there may be copies of the same logs in multiple places (e.g.
    on S3 and by SSHing into nodes) and we want to list them all without
    finding duplicate copies. This function will go through the lists of
    log dirs in turn, stopping if it finds any matches from a list.

    *matcher* is a function that takes (log_path, **kwargs)
    and returns either None (no match) or a dictionary with information
    about the path (e.g. the corresponding job_id). It's okay to return
    an empty dict.
    """
    # wrapper for fs.ls() that turns IOErrors into warnings
    def _fs_ls(path):
        try:
            log.debug('    listing logs in %s' % log_dir)
            if fs.exists(log_dir):
                for path in fs.ls(log_dir):
                    yield path
        except (IOError, OSError) as e:
            log.warning("couldn't ls() %s: %r" % (log_dir, e))

    for log_dirs in log_dir_stream:
        if isinstance(log_dirs, str):
            raise TypeError

        matched = False

        for log_dir in log_dirs:
            matches = []

            for path in _fs_ls(log_dir):
                match = matcher(path, **kwargs)
                if match is not None:
                    match['path'] = path
                    matches.append(match)

            if matches:
                matched = True

                if is_spark:
                    matches = _sort_for_spark(matches)
                else:
                    matches = _sort_by_recency(matches)

                for match in matches:
                    yield match

        if matched:
            return  # e.g. don't check S3 if we can get logs via SSH


def _logs_exist(fs, path):
    """Do ``fs.exists(path)``, and return ``None`` if it raises ``IOError``"""
    try:
        return fs.exists(path)
    except IOError:
        return None