mirror of
https://github.com/emilybache/GildedRose-Refactoring-Kata.git
synced 2026-02-09 19:51:41 +00:00
168 lines
5.4 KiB
Python
168 lines
5.4 KiB
Python
# Copyright 2017-2018 Yelp
|
|
# Copyright 2019 Yelp
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Print probable cause of error for a failed step.
|
|
|
|
Currently this only works on EMR.
|
|
|
|
Usage::
|
|
|
|
mrjob diagnose [opts] j-CLUSTERID
|
|
|
|
Options::
|
|
|
|
-c CONF_PATHS, --conf-path CONF_PATHS
|
|
Path to alternate mrjob.conf file to read from
|
|
--no-conf Don't load mrjob.conf even if it's available
|
|
--ec2-endpoint EC2_ENDPOINT
|
|
Force mrjob to connect to EC2 on this endpoint (e.g.
|
|
ec2.us-west-1.amazonaws.com). Default is to infer this
|
|
from region.
|
|
--emr-endpoint EMR_ENDPOINT
|
|
Force mrjob to connect to EMR on this endpoint (e.g.
|
|
us-west-1.elasticmapreduce.amazonaws.com). Default is
|
|
to infer this from region.
|
|
-h, --help show this help message and exit
|
|
-q, --quiet Don't print anything to stderr
|
|
--region REGION GCE/AWS region to run Dataproc/EMR jobs in.
|
|
--s3-endpoint S3_ENDPOINT
|
|
Force mrjob to connect to S3 on this endpoint (e.g. s3
|
|
-us-west-1.amazonaws.com). You usually shouldn't set
|
|
this; by default mrjob will choose the correct
|
|
endpoint for each S3 bucket based on its location.
|
|
--step-id STEP_ID ID of a particular failed step to diagnose
|
|
-v, --verbose print more messages to stderr
|
|
|
|
.. versionadded:: 0.6.1
|
|
"""
|
|
from argparse import ArgumentParser
|
|
from logging import getLogger
|
|
|
|
from mrjob.aws import _boto3_paginate
|
|
from mrjob.emr import EMRJobRunner
|
|
from mrjob.job import MRJob
|
|
from mrjob.logs.errors import _format_error
|
|
from mrjob.options import _add_basic_args
|
|
from mrjob.options import _add_runner_args
|
|
from mrjob.options import _alphabetize_actions
|
|
from mrjob.options import _filter_by_role
|
|
|
|
log = getLogger(__name__)
|
|
|
|
|
|
def main(cl_args=None):
|
|
arg_parser = _make_arg_parser()
|
|
options = arg_parser.parse_args(cl_args)
|
|
|
|
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
|
|
|
|
runner_kwargs = {k: v for k, v in options.__dict__.items()
|
|
if k not in ('quiet', 'verbose', 'step_id')}
|
|
|
|
runner = EMRJobRunner(**runner_kwargs)
|
|
emr_client = runner.make_emr_client()
|
|
|
|
# pick step
|
|
step = _get_step(emr_client, options.cluster_id, options.step_id)
|
|
|
|
if not step:
|
|
raise SystemExit(1)
|
|
|
|
if step['Status']['State'] != 'FAILED':
|
|
log.warning('step %s has state %s, not FAILED' %
|
|
(step['Id'], step['Status']['State']))
|
|
|
|
# interpret logs
|
|
log.info('Diagnosing step %s (%s)' % (step['Id'], step['Name']))
|
|
|
|
log_interpretation = dict(step_id=step['Id'])
|
|
|
|
step_type = _infer_step_type(step)
|
|
|
|
error = runner._pick_error(log_interpretation, step_type)
|
|
|
|
# print error
|
|
if error:
|
|
log.error('Probable cause of failure:\n\n%s\n\n' %
|
|
_format_error(error))
|
|
else:
|
|
log.warning('No error detected')
|
|
|
|
|
|
def _get_step(emr_client, cluster_id, step_id=None):
|
|
|
|
# just iterate backwards through steps, rather than filtering
|
|
# by step ID or status. usually it'll be the last step anyhow
|
|
|
|
for step in _boto3_paginate('Steps', emr_client, 'list_steps',
|
|
ClusterId=cluster_id):
|
|
|
|
if _step_matches(step, step_id=step_id):
|
|
return step
|
|
else:
|
|
if step_id:
|
|
log.error('step %s not found on cluster %s' %
|
|
(step_id, cluster_id))
|
|
else:
|
|
log.error('cluster %s has no failed steps' % cluster_id)
|
|
|
|
|
|
def _step_matches(step, step_id=None):
|
|
if not step_id:
|
|
return step['Status']['State'] == 'FAILED'
|
|
else:
|
|
return step['Id'] == step_id
|
|
|
|
|
|
def _infer_step_type(step):
|
|
args = step['Config']['Args']
|
|
|
|
# all that matters for log parsing is picking out Spark steps
|
|
# (doesn't matter if it's spark or spark_jar or spark_script)
|
|
#
|
|
# and of course we don't know the logging habits of jar steps,
|
|
# so we might as well use streaming's logic
|
|
if '--master' in args and '--deploy-mode' in args:
|
|
return 'spark'
|
|
else:
|
|
return 'streaming'
|
|
|
|
|
|
def _make_arg_parser():
|
|
usage = '%(prog)s diagnose [opts] [--step-id STEP_ID] CLUSTER_ID'
|
|
description = (
|
|
'Get probable cause of failure for step on CLUSTER_ID.'
|
|
' By default we look at the last failed step')
|
|
arg_parser = ArgumentParser(usage=usage, description=description)
|
|
|
|
_add_basic_args(arg_parser)
|
|
_add_runner_args(
|
|
arg_parser,
|
|
_filter_by_role(EMRJobRunner.OPT_NAMES, 'connect'))
|
|
|
|
arg_parser.add_argument(
|
|
dest='cluster_id',
|
|
help='ID of cluster with failed step')
|
|
arg_parser.add_argument(
|
|
'--step-id', dest='step_id',
|
|
help='ID of a particular failed step to diagnose')
|
|
|
|
_alphabetize_actions(arg_parser)
|
|
|
|
return arg_parser
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|