# Copyright 2012 Yelp and Contributors
# Copyright 2013 Lyft
# Copyright 2014 Brett Gibson
# Copyright 2015-2019 Yelp
# Copyright 2020 Affirm, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities related to cluster pooling. This code used to be in mrjob.emr.

In theory, this module might support pooling in general, but so far, there's
only a need for pooling on EMR.

"""
import time
from collections import defaultdict
from logging import getLogger

try:
    from botocore.exceptions import ClientError
except ImportError:
    ClientError = Exception

import mrjob
from mrjob.aws import EC2_INSTANCE_TYPE_TO_COMPUTE_UNITS
from mrjob.aws import EC2_INSTANCE_TYPE_TO_MEMORY
from mrjob.aws import _boto3_paginate
from mrjob.py2 import integer_types
from mrjob.py2 import string_types

log = getLogger(__name__)

# we check the type and contents of requested fleets/groups because they
# are user-specified and may not have the correct format. Currently, we
# simply return no match, since either boto3 or the EMR AMI will catch
# the error when EMRJobRunner attempts to create a new cluster. See #1696


### tagging pooled clusters ###

def _pool_tags(hash, name):
    """Return a dict with "hidden" tags to add to the given cluster."""
    return dict(__mrjob_pool_hash=hash, __mrjob_pool_name=name)


def _extract_tags(cluster):
    """Pull the tags from a cluster, as a dict."""
    return {t['Key']: t['Value'] for t in cluster.get('Tags') or []}


def _pool_name(cluster):
    tags = _extract_tags(cluster)
    return tags.get('__mrjob_pool_name')


### putting pooling information in the name of a cluster

# this may change between versions of mrjob

def _cluster_name_suffix(hash, name):
    fields = [mrjob.__version__, name, hash]
    return ' pooling:%s' % ','.join(fields)


def _parse_cluster_name_suffix(cluster_name):
    """Return a dictionary possibly containing the keys:

    mrjob_version: version of mrjob that created this cluster
    pool_hash: hash representing bootstrap setup etc.
    pool_name: name of the cluster pool

    If the cluster is not pooled or we can't parse its pooling suffix,
    return ``{}``.
    """
    # return version, hash, and name from cluster pool suffix

    i = cluster_name.find(' pooling:')
    if i == -1:
        return {}

    suffix = cluster_name[i + len(' pooling:'):]

    parts = suffix.split(',', 3)

    if len(parts) == 3:
        return dict(
            mrjob_version=parts[0],
            pool_name=parts[1],
            pool_hash=parts[2],
        )
    else:
        return {}


### instance groups ###

def _instance_groups_satisfy(actual_igs, requested_igs):
    """Do the actual instance groups from a cluster satisfy the requested
    ones, for the purpose of pooling?
    """
    # the format of *requested_igs* is here:
    #     http://docs.aws.amazon.com/ElasticMapReduce/latest/API/API_InstanceGroup.html  # noqa
    # and the format of *actual_igs* is here:
    #     http://docs.aws.amazon.com/ElasticMapReduce/latest/API/API_ListInstanceGroups.html  # noqa

    # verify format of requested_igs
    if not (isinstance(requested_igs, (list, tuple)) and
            all(isinstance(req_ig, dict) and 'InstanceRole' in req_ig
                for req_ig in requested_igs)):
        log.debug('    bad instance_groups config')
        return False

    # a is a map from role to actual instance groups
    a = defaultdict(list)
    for ig in actual_igs:
        a[ig['InstanceGroupType']].append(ig)

    # r is a map from role to request (should be only one per role)
    r = {req.get('InstanceRole'): req for req in requested_igs}

    # updated request to account for extra instance groups
    # see #1630 for what we do when roles don't match
    if set(a) - set(r):
        r = _add_missing_roles_to_request(set(a) - set(r), r,
                                          ['InstanceCount'])

    if set(a) != set(r):
        log.debug("    missing instance group roles")
        return False

    for role in r:
        if not _igs_for_same_role_satisfy(a[role], r[role]):
            return False

    return True


def _igs_for_same_role_satisfy(actual_igs, requested_ig):
    """Does the *actual* list of instance groups satisfy the *requested*
    one?
    """
    # bid price/on-demand
    if not all(_ig_satisfies_bid_price(ig, requested_ig) for ig in actual_igs):
        return False

    # memory
    if not all(_ig_satisfies_mem(ig, requested_ig) for ig in actual_igs):
        return False

    # EBS volumes
    if not all(_ebs_satisfies(ig, requested_ig) for ig in actual_igs):
        return False

    # CPU (this returns # of compute units or None)
    return _igs_satisfy_cpu(actual_igs, requested_ig)


def _ig_satisfies_bid_price(actual_ig, requested_ig):
    """Does the actual instance group definition satisfy the bid price
    (or lack thereof) of the requested instance group?
    """
    # _instance_groups_satisfy() already verified *requested_ig* is a dict

    # on-demand instances satisfy every bid price
    if actual_ig['Market'] == 'ON_DEMAND':
        return True

    if requested_ig.get('Market', 'ON_DEMAND') == 'ON_DEMAND':
        log.debug('    spot instance, requested on-demand')
        return False

    if actual_ig['BidPrice'] == requested_ig.get('BidPrice'):
        return True

    try:
        if float(actual_ig['BidPrice']) >= float(requested_ig.get('BidPrice')):
            return True
        else:
            # low bid prices mean cluster is more likely to be
            # yanked away
            log.debug('    bid price too low')
            return False
    except ValueError:
        log.debug('    non-float bid price')
        return False


def _ig_satisfies_mem(actual_ig, requested_ig):
    """Does the actual instance group satisfy the memory requirements of
    the requested instance group?"""
    actual_type = actual_ig['InstanceType']
    requested_type = requested_ig.get('InstanceType')

    # this works even for unknown instance types
    if actual_type == requested_type:
        return True

    try:
        if (EC2_INSTANCE_TYPE_TO_MEMORY[actual_type] >=
                EC2_INSTANCE_TYPE_TO_MEMORY[requested_type]):
            return True
        else:
            log.debug('    too little memory')
            return False
    except KeyError:
        log.debug('    unknown instance type')
        return False


def _igs_satisfy_cpu(actual_igs, requested_ig):
    """Does the list of actual instance groups satisfy the CPU requirements
    of the requested instance group?
    """
    requested_type = requested_ig.get('InstanceType')
    num_requested = requested_ig.get('InstanceCount')

    if not isinstance(num_requested, integer_types):
        return False

    # count number of compute units (cu)
    if requested_type in EC2_INSTANCE_TYPE_TO_COMPUTE_UNITS:
        requested_cu = (
            num_requested * EC2_INSTANCE_TYPE_TO_COMPUTE_UNITS[requested_type])

        # don't require instances to be running; we'd be worse off if
        # we started our own cluster from scratch. (This can happen if
        # the previous job finished while some task instances were
        # still being provisioned.)
        actual_cu = sum(
            ig['RunningInstanceCount'] *
            EC2_INSTANCE_TYPE_TO_COMPUTE_UNITS.get(ig['InstanceType'], 0.0)
            for ig in actual_igs)
    else:
        # unknown instance type, just count # of matching instances
        requested_cu = num_requested
        actual_cu = sum(ig['RunningInstanceCount'] for ig in actual_igs
                        if ig['InstanceType'] == requested_type)

    if actual_cu >= requested_cu:
        return True
    else:
        log.debug('    not enough compute units')
        return False


### instance fleets ###

def _instance_fleets_satisfy(actual_fleets, req_fleets):
    """Common code for :py:func:`
    :py:func:`_instance_groups_satisfy_fleets` and
    :py:func:`_instance_groups_satisfy`."""
    # verify format of requested_igs
    if not (isinstance(req_fleets, (list, tuple)) and
            all(isinstance(req_ft, dict) and 'InstanceFleetType' in req_ft
                for req_ft in req_fleets)):
        log.debug('    bad instance_fleets config')
        return False

    # a is a map from role to actual instance fleet
    # (unlike with groups, there can never be more than one fleet per role)
    a = {f['InstanceFleetType']: f for f in actual_fleets}

    # r is a map from role to request (should be only one per role)
    r = {f['InstanceFleetType']: f for f in req_fleets}

    # updated request to account for extra instance groups
    # see #1630 for what we do when roles don't match
    if set(a) - set(r):
        r = _add_missing_roles_to_request(
            set(a) - set(r), r,
            ['TargetOnDemandCapacity', 'TargetSpotCapacity'])

    if set(a) != set(r):
        log.debug("    missing instance fleet roles")
        return False

    for role in r:
        if not _fleet_for_same_role_satisfies(a[role], r[role]):
            return False

    return True


def _fleet_for_same_role_satisfies(actual_fleet, req_fleet):
    # match up instance types
    actual_specs = {spec['InstanceType']: spec
                    for spec in actual_fleet['InstanceTypeSpecifications']}
    try:
        req_specs = {spec['InstanceType']: spec
                     for spec in req_fleet['InstanceTypeConfigs']}
    except (TypeError, KeyError):
        return False

    if set(actual_specs) - set(req_specs):
        log.debug('    fleet may include wrong instance types')
        return False

    if not all(_fleet_spec_satsifies(actual_specs[t], req_specs[t])
               for t in actual_specs):
        return False

    # capacity
    actual_on_demand = actual_fleet.get('ProvisionedOnDemandCapacity', 0)
    req_on_demand = req_fleet.get('TargetOnDemandCapacity', 0)

    if not isinstance(req_on_demand, integer_types):
        return False

    if req_on_demand > actual_on_demand:
        log.debug('    not enough on-demand capacity')
        return False

    actual_spot = actual_fleet.get('ProvisionedSpotCapacity', 0)
    req_spot = req_fleet.get('TargetSpotCapacity', 0)

    if not isinstance(req_spot, integer_types):
        return False

    # allow extra on-demand instances to serve as spot instances
    if req_spot > actual_spot + (actual_on_demand - req_on_demand):
        log.debug('    not enough spot capacity')

    # handle TERMINATE_CLUSTER timeout action. This really doesn't play
    # well with pooling anyhow
    if _get_timeout_action(actual_fleet) == 'TERMINATE_CLUSTER':
        if _get_timeout_action(req_fleet) != 'TERMINATE_CLUSTER':
            log.debug('    self-terminating fleet not requested')
            return False

        if (_get_timeout_duration(actual_fleet) <
                _get_timeout_duration(req_fleet)):
            log.debug('    fleet may self-terminate prematurely')
            return False

    return True


def _get_timeout_action(fleet):
    return fleet.get(
        'LaunchSpecifications', {}).get(
        'SpotSpecification', {}).get(
        'TimeoutAction')


def _get_timeout_duration(fleet):
    return fleet.get(
        'LaunchSpecifications', {}).get(
        'SpotSpecification', {}).get(
        'TimeoutDurationMinutes', 0.0)


def _fleet_spec_satsifies(actual_spec, req_spec):
    """Make sure the specification for the given instance type is as
    good or better than the requested spec.

    Specs must have the same weight, but "better" EBS configurations are
    accepted.

    Bid price must either be higher or the *actual* bid price
    must be same as on-demand
    """
    if (actual_spec.get('WeightedCapacity', 1) !=
            req_spec.get('WeightedCapacity', 1)):
        log.debug('    different weighted capacity for same instance type')
        return False

    if not _ebs_satisfies(actual_spec, req_spec):
        return False

    # bid price is the max, don't worry about it
    if actual_spec.get('BidPriceAsPercentageOfOnDemandPrice', 100) >= 100:
        return True

    # absolute bid price
    req_bid_price = req_spec.get('BidPrice')
    if req_bid_price is not None:
        actual_bid_price = actual_spec.get('BidPrice')
        if actual_bid_price is None:
            log.debug('    no bid price specified')
            return False

        try:
            if not float(actual_bid_price) >= float(req_bid_price):
                log.debug('    bid price too low')
                return False
        except TypeError:
            log.debug('    non-numeric bid price')
            return False

    # relative bid price
    req_bid_percent = req_spec.get('BidPriceAsPercentageOfOnDemandPrice')
    if not isinstance(req_spec, (integer_types, float)):
        return False

    if req_bid_percent:
        actual_bid_percent = actual_spec.get(
            'BidPriceAsPercentageOfOnDemandPrice')
        if actual_bid_percent is None:
            log.debug('    no bid price as % of on-demand price')
            return False

        if req_bid_percent > actual_bid_percent:
            log.debug('    bid price as % of on-demand price too low')
            return False

    return True


### common code for matching instance (groups or fleets) ###


def _add_missing_roles_to_request(
        missing_roles, role_to_req, req_count_fields):
    """Helper for :py:func:`_igs_satisfy_request`. Add requests for
    *missing_roles* to *role_to_ig* so that we have a better chance of
    matching the cluster's actual instance groups."""
    # see #1630 for discussion

    # don't worry about modifying *role_to_req*; this is
    # a helper func

    if 'CORE' in missing_roles and list(role_to_req) == ['MASTER']:
        # both core and master have to satisfy master-only request
        role_to_req['CORE'] = role_to_req['MASTER']

    if 'TASK' in missing_roles and 'CORE' in role_to_req:
        # make sure tasks won't crash on the task instances,
        # but don't require the same amount of CPU
        role_to_req['TASK'] = dict(role_to_req['CORE'])
        for req_count_field in req_count_fields:
            role_to_req['TASK'][req_count_field] = 0

    return role_to_req


def _ebs_satisfies(actual, request):
    """Does *actual* have EBS volumes that satisfy *request*.

    *actual* is either an instance group from ``ListInstanceGroups``
    or an instance fleet spec from ``ListInstanceFleets`` (format
    is the same).

    *request* is either the ``InstanceGroups`` or ``InstanceFleets``
    param to ``RunJobFlow``

    If *request* doesn't have an EBS Configuration, we return
    True.

    If *request* requests EBS optimization, *actual* should provide it.

    Finally, *actual* should have the same or better block devices
    as those in *request* (same volume type, at least as much IOPS
    and volume size).
    """
    req_ebs_config = request.get('EbsConfiguration')

    if not req_ebs_config:
        return True

    if (req_ebs_config.get('EbsOptimized') and
            not actual.get('EbsOptimized')):
        log.debug('    need EBS-optimized instances')
        return False

    req_device_configs = req_ebs_config.get('EbsBlockDeviceConfigs')

    if not req_device_configs:
        return True

    if not (isinstance(req_device_configs, (list, tuple)) and
            all(isinstance(rdc, dict) for rdc in req_device_configs)):
        return False

    req_volumes = []

    for req_device_config in req_device_configs:
        volume = req_device_config['VolumeSpecification']
        num_volumes = req_device_config.get('VolumesPerInstance', 1)

        req_volumes.extend([volume] * num_volumes)

    actual_volumes = [
        bd.get('VolumeSpecification', {})
        for bd in actual.get('EbsBlockDevices', [])]

    return _ebs_volumes_satisfy(actual_volumes, req_volumes)


def _ebs_volumes_satisfy(actual_volumes, req_volumes):
    """Does the given list of actual EBS volumes satisfy the given request?

    Just compare them one by one (we want each actual device to be
    bigger/faster; just having the same amount of capacity or iops
    isn't enough).
    """
    if not isinstance(req_volumes, (list, tuple)):
        return False

    if len(req_volumes) > len(actual_volumes):
        log.debug('    more EBS volumes requested than available')
        return False

    return all(_ebs_volume_satisfies(a, r)
               for a, r in zip(actual_volumes, req_volumes))


def _ebs_volume_satisfies(actual_volume, req_volume):
    """Does the given actual EBS volume satisfy the given request?"""
    if not isinstance(req_volume, dict):
        return False

    if req_volume.get('VolumeType') != actual_volume.get('VolumeType'):
        log.debug('    wrong EBS volume type')
        return False

    if not req_volume.get('SizeInGB', 0) <= actual_volume.get('SizeInGB', 0):
        log.debug('    EBS volume too small')
        return False

    # Iops isn't really "optional"; it has to be set if volume type is
    # io1 and not set otherwise
    if not (req_volume.get('Iops', 0) <= actual_volume.get('Iops', 0)):
        log.debug('    EBS volume too slow')
        return False

    return True


### locking pooled clusters ###

# Locking ensures that two jobs don't add their steps to the same cluster at
# the same time

# after 60 seconds, a lock is considered released
_CLUSTER_LOCK_SECS = 60.0

# describe the cluster and add our tag within the first 5 seconds
_ADD_TAG_BEFORE = 5.0

# then wait 10 seconds before checking if our tag is still there
_WAIT_AFTER_ADD_TAG = 10.0

# make sure we have at least 40 seconds left to add steps and have them
# start running, before the lock expires
_CHECK_TAG_BEFORE = 20.0

# tag key used for locking pooled clusters
_POOL_LOCK_KEY = '__mrjob_pool_lock'


def _make_cluster_lock(job_key, expiry):
    """Return the contents of a tag used to lock a cluster.

    *expiry* is the unix timestamp for when the lock is no longer valid"""
    return '%s %.6f' % (job_key, expiry)


def _parse_cluster_lock(lock):
    """Return (job_key, expiry) or raise ValueError

    Raises TypeError if *lock* is not a string.
    """
    if not isinstance(lock, (string_types)):
        raise TypeError

    job_key, expiry_str = lock.split(' ')

    try:
        expiry = float(expiry_str)
    except TypeError:
        raise ValueError

    return job_key, expiry


def _get_cluster_lock(cluster):
    return _extract_tags(cluster).get(_POOL_LOCK_KEY)


def _attempt_to_lock_cluster(
        emr_client, cluster_id, job_key,
        cluster=None, when_cluster_described=None):
    """Attempt to lock the given pooled cluster using EMR tags.

    You may optionally include *cluster* (a cluster description) and
    *when_cluster_described*, to save an API call to ``DescribeCluster``

    If the cluster's StepConcurrency Level is 1, locking considers the cluster
    available if it's in the WAITING state. this means we should not release
    our lock until our step(s) have started running, which can take several
    seconds.

    Otherwise, steps can run concurrently, so locking
    considers the cluster available if it's in the WAITING or RUNNING state.
    Additionally, it makes a ``ListSteps`` API call to verify that the cluster
    doesn't already have as many active steps as it can run simultaneously.
    Because other jobs looking to join the cluster will also count steps,
    we can release our lock as soon as we add our steps.
    """
    log.debug('Attempting to lock cluster %s for %.1f seconds' % (
        cluster_id, _CLUSTER_LOCK_SECS))

    if cluster is None:
        cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']

    if when_cluster_described is None:
        start = time.time()
    else:
        start = when_cluster_described

    if cluster['StepConcurrencyLevel'] == 1:
        step_accepting_states = ['WAITING']
    else:
        step_accepting_states = ['RUNNING', 'WAITING']

    # check if there is a non-expired lock
    state = cluster['Status']['State']

    if state not in step_accepting_states:
        # this could happen if the cluster were TERMINATING, for example
        log.info('  cluster is not accepting steps, state is %s' % state)
        return False

    lock = _get_cluster_lock(cluster)

    if lock:
        expiry = None
        try:
            their_job_key, expiry = _parse_cluster_lock(lock)
        except ValueError:
            log.info('  ignoring invalid pool lock: %s' % lock)

        if expiry and expiry > start:
            log.info('  locked by %s for %.1f seconds' % (
                their_job_key, expiry - start))
            return False

    # add our lock
    our_lock = _make_cluster_lock(job_key, start + _CLUSTER_LOCK_SECS)

    log.debug('  adding tag to cluster %s:' % cluster_id)
    log.debug('    %s=%s' % (_POOL_LOCK_KEY, our_lock))
    emr_client.add_tags(
        ResourceId=cluster_id,
        Tags=[dict(Key=_POOL_LOCK_KEY, Value=our_lock)]
    )

    if time.time() - start > _ADD_TAG_BEFORE:
        log.info('  took too long to tag cluster with lock')
        return False

    # wait, then check if our lock is still there
    log.info("  waiting %.1f seconds to ensure lock wasn't overwritten" %
             _WAIT_AFTER_ADD_TAG)
    time.sleep(_WAIT_AFTER_ADD_TAG)

    # check if our lock is still there
    cluster = emr_client.describe_cluster(ClusterId=cluster_id)['Cluster']

    state = cluster['Status']['State']

    if state not in step_accepting_states:
        # this could happen if the cluster were TERMINATING, for example
        log.info('  cluster is not accepting steps, state is %s' % state)
        return False

    if cluster['StepConcurrencyLevel'] > 1:
        # is cluster already full of steps?
        num_active_steps = len(list(_boto3_paginate(
            'Steps', emr_client, 'list_steps',
            ClusterId=cluster_id,
            StepStates=['PENDING', 'RUNNING'])))

        if num_active_steps >= cluster['StepConcurrencyLevel']:
            log.info(
                '  cluster already has %d active steps' % num_active_steps)
            return

    lock = _get_cluster_lock(cluster)

    if lock is None:
        log.info('  lock was removed')
        return False
    elif lock != our_lock:
        their_job_desc = 'other job'
        try:
            their_job_desc, expiry = _parse_cluster_lock(lock)
        except ValueError:
            pass

        log.info('  lock was overwritten by %s' % their_job_desc)
        return False

    # make sure we have enough time to add steps and have them run
    # before the lock expires
    if time.time() > start + _CHECK_TAG_BEFORE:
        log.info('  took too long to check for lock')
        return False

    log.info('  lock acquired')
    return True


def _attempt_to_unlock_cluster(emr_client, cluster_id):
    """Release our lock on the given pooled cluster. Only do this if you know
    the cluster is currently running steps (so other jobs won't try to
    join the cluster).

    Returns True if successful, False if not (usually, this means the
    cluster terminated). Cluster locks eventually release themselves,
    so if releasing a lock fails for whatever reason, it's not worth
    releasing it again.

    Locks expire after a minute anyway (which is less time than it takes to
    run most jobs), so this is mostly useful for preventing problems
    due to clock skew. Also makes unit testing more straightforward.
    """
    try:
        emr_client.remove_tags(ResourceId=cluster_id, TagKeys=[_POOL_LOCK_KEY])
        return True
    except ClientError as ex:
        log.debug('removing tags failed: %r' % ex)
        return False