GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/aws.py
2025-06-22 13:36:01 +05:30

304 lines
8.2 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2013 Lyft
# Copyright 2015-2016 Yelp
# Copyright 2017 Yelp and Contributors
# Copyright 2019 Yelp and Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""General information about Amazon Web Services, such as region-to-endpoint
mappings. Also includes basic utilities for working with boto3.
"""
import socket
import time
from datetime import datetime
# dateutil is a boto3 dependency
try:
from dateutil.tz import tzutc
tzutc
except ImportError:
tzutc = None
try:
import boto3
boto3 # quiet "redefinition of unused ..." warning from pyflakes
except ImportError:
boto3 = None
try:
import botocore.client
botocore # quiet "redefinition of unused ..." warning from pyflakes
except ImportError:
botocore = None
# ssl is a built-in package, but isn't available if no SSL library is installed
try:
from ssl import SSLError
except ImportError:
SSLError = None
from mrjob.retry import RetryWrapper
### EC2 Instances ###
# map from instance type to number of compute units, from:
# - https://aws.amazon.com/ec2/pricing/
# - http://aws.amazon.com/ec2/previous-generation/
EC2_INSTANCE_TYPE_TO_COMPUTE_UNITS = {
'c1.medium': 5,
'c1.xlarge': 20,
'c3.2xlarge': 28,
'c3.4xlarge': 55,
'c3.8xlarge': 108,
'c3.large': 7,
'c3.xlarge': 14,
'c4.2xlarge': 31,
'c4.4xlarge': 62,
'c4.8xlarge': 132,
'c4.large': 8,
'c4.xlarge': 16,
'c5.18xlarge': 281,
'c5.2xlarge': 34,
'c5.4xlarge': 68,
'c5.9xlarge': 141,
'c5.large': 9,
'c5.xlarge': 17,
'cc1.4xlarge': 33.5,
'cc2.8xlarge': 88,
'cg1.4xlarge': 33.5,
'cr1.8xlarge': 88,
'd2.2xlarge': 27,
'd2.4xlarge': 56,
'd2.8xlarge': 116,
'd2.xlarge': 14,
'g2.2xlarge': 26,
'g2.8xlarge': 104,
'hi1.4xlarge': 35,
'hs1.8xlarge': 35,
'i2.2xlarge': 27,
'i2.4xlarge': 53,
'i2.8xlarge': 104,
'i2.xlarge': 14,
'm1.large': 4,
'm1.medium': 2,
'm1.small': 1,
'm1.xlarge': 8,
'm2.2xlarge': 13,
'm2.4xlarge': 26,
'm2.xlarge': 6.5,
'm3.2xlarge': 26,
'm3.large': 6.5,
'm3.medium': 3,
'm3.xlarge': 13,
'm4.10xlarge': 124.5,
'm4.16xlarge': 188,
'm4.2xlarge': 26,
'm4.4xlarge': 53.5,
'm4.large': 6.5,
'm4.xlarge': 13,
'm5.12xlarge': 173,
'm5.24xlarge': 345,
'm5.2xlarge': 31,
'm5.4xlarge': 60,
'm5.large': 8,
'm5.xlarge': 16,
'r3.2xlarge': 26,
'r3.4xlarge': 52,
'r3.8xlarge': 104,
'r3.large': 6.5,
'r3.xlarge': 13,
'r4.16xlarge': 195,
'r4.2xlarge': 27,
'r4.4xlarge': 53,
'r4.8xlarge': 99,
'r4.large': 7,
'r4.xlarge': 13.6,
}
# map from instance type to GB of memory
# from http://aws.amazon.com/ec2/instance-types/
# and http://aws.amazon.com/ec2/previous-generation/
EC2_INSTANCE_TYPE_TO_MEMORY = {
'c1.medium': 1.7,
'c1.xlarge': 7,
'c3.2xlarge': 15,
'c3.4xlarge': 30,
'c3.8xlarge': 60,
'c3.large': 3.75,
'c3.xlarge': 7.5,
'c4.2xlarge': 15,
'c4.4xlarge': 30,
'c4.8xlarge': 60,
'c4.large': 3.75,
'c4.xlarge': 7.5,
'c5.18xlarge': 144,
'c5.2xlarge': 16,
'c5.4xlarge': 32,
'c5.9xlarge': 72,
'c5.large': 4,
'c5.xlarge': 8,
'cc1.4xlarge': 23,
'cc2.8xlarge': 60.5,
'cg1.4xlarge': 22.5,
'cr1.8xlarge': 244,
'd2.2xlarge': 61,
'd2.4xlarge': 122,
'd2.8xlarge': 244,
'd2.xlarge': 30.5,
'g2.2xlarge': 15,
'g2.8xlarge': 60,
'hi1.4xlarge': 60.5,
'hs1.8xlarge': 117,
'i2.2xlarge': 61,
'i2.4xlarge': 122,
'i2.8xlarge': 244,
'i2.xlarge': 30.5,
'm1.large': 7.5,
'm1.medium': 3.75,
'm1.small': 1.7,
'm1.xlarge': 17.1,
'm2.2xlarge': 34.2,
'm2.4xlarge': 68.4,
'm2.xlarge': 17.5,
'm3.2xlarge': 30,
'm3.large': 7.5,
'm3.medium': 3.75,
'm3.xlarge': 15,
'm4.10xlarge': 160,
'm4.16xlarge': 256,
'm4.2xlarge': 32,
'm4.4xlarge': 64,
'm4.large': 8,
'm4.xlarge': 16,
'm5.12xlarge': 192,
'm5.24xlarge': 384,
'm5.2xlarge': 32,
'm5.4xlarge': 64,
'm5.large': 8,
'm5.xlarge': 16,
'r3.2xlarge': 61,
'r3.4xlarge': 122,
'r3.8xlarge': 244,
'r3.large': 15,
'r3.xlarge': 30.5,
'r4.16xlarge': 488,
'r4.2xlarge': 61,
'r4.4xlarge': 122,
'r4.8xlarge': 244,
'r4.large': 15.25,
'r4.xlarge': 30.5,
}
### Regions ###
# us-east-1 doesn't have its own endpoint or need bucket location constraints
_S3_REGION_WITH_NO_LOCATION_CONSTRAINT = 'us-east-1'
# The region to assume if none is specified
_DEFAULT_AWS_REGION = 'us-east-1'
### Utilities ###
# if AWS throttles us, how long to wait (in seconds) before trying again?
_AWS_BACKOFF = 20
_AWS_BACKOFF_MULTIPLIER = 1.5
_AWS_MAX_TRIES = 20 # this takes about a day before we run out of tries
def _client_error_code(ex):
"""Get the error code for the given ClientError"""
return ex.response.get('Error', {}).get('Code', '')
def _client_error_status(ex):
"""Get the HTTP status for the given ClientError"""
resp = ex.response
# sometimes status code is in ResponseMetadata, not Error
return (resp.get('Error', {}).get('HTTPStatusCode') or
resp.get('ResponseMetadata', {}).get('HTTPStatusCode'))
def _is_retriable_client_error(ex):
"""Is the exception from a boto3 client retriable?"""
if isinstance(ex, botocore.exceptions.ClientError):
# these rarely get through in boto3
code = _client_error_code(ex)
# "Throttl" catches "Throttled" and "Throttling"
if any(c in code for c in ('Throttl', 'RequestExpired', 'Timeout')):
return True
# spurious 505s thought to be part of an AWS load balancer issue
return _client_error_status(ex) == 505
# in Python 2.7, SSLError is a subclass of socket.error, so catch
# SSLError first
elif isinstance(ex, SSLError):
# catch ssl.SSLError: ('The read operation timed out',). See #1827.
# also catches 'The write operation timed out'
return any(isinstance(arg, str) and 'timed out' in arg
for arg in ex.args)
elif isinstance(ex, socket.error):
return ex.args in ((104, 'Connection reset by peer'),
(110, 'Connection timed out'))
else:
return False
def _wrap_aws_client(raw_client, min_backoff=None):
"""Wrap a given boto3 Client object so that it can retry when
throttled."""
return RetryWrapper(
raw_client,
retry_if=_is_retriable_client_error,
backoff=max(_AWS_BACKOFF, min_backoff or 0),
multiplier=_AWS_BACKOFF_MULTIPLIER,
max_tries=_AWS_MAX_TRIES,
unwrap_methods={'get_paginator'})
def _boto3_now():
"""Get a ``datetime`` that's compatible with :py:mod:`boto3`.
These are always UTC time, with time zone ``dateutil.tz.tzutc()``.
"""
if tzutc is None:
raise ImportError(
'You must install dateutil to get boto3-compatible datetimes')
return datetime.now(tzutc())
def _boto3_paginate(what, boto3_client, api_call, **api_params):
"""Yield results from a paginatable API client call.
*what* is the name of the field the holds the list of items
in each page (e.g. ``'InstanceGroups'``).
This doesn't do anything magical; it just saves the trouble of creating
variable names for your paginator and its pages.
You can add the keyword ``_delay`` to *api_params* to sleep that many
seconds after each API call
"""
# added the _delay kwarg for the audit-emr-usage tool; see #1091
_delay = api_params.pop('_delay', None)
paginator = boto3_client.get_paginator(api_call)
for page in paginator.paginate(**api_params):
if _delay:
time.sleep(_delay)
for item in page[what]:
yield item