mirror of
https://github.com/emilybache/GildedRose-Refactoring-Kata.git
synced 2026-02-08 19:21:28 +00:00
735 lines
31 KiB
Python
735 lines
31 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2009-2012 Yelp
|
|
# Copyright 2013-2014 Yelp and Contributors
|
|
# Copyright 2015-2016 Yelp
|
|
# Copyright 2018 Ben Dalling
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Utility functions for compatibility with different version of hadoop."""
|
|
from distutils.version import LooseVersion
|
|
import logging
|
|
import os
|
|
|
|
from mrjob.py2 import string_types
|
|
|
|
# lists alternative names for jobconf variables
|
|
# full listing thanks to translation table in
|
|
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/DeprecatedProperties.html # noqa
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_JOBCONF_DICT_LIST = [
|
|
{'1.0': 'StorageId',
|
|
'2.0': 'dfs.datanode.StorageId'},
|
|
{'1.0': 'create.empty.dir.if.nonexist',
|
|
'2.0': 'mapreduce.jobcontrol.createdir.ifnotexist'},
|
|
{'1.0': 'dfs.access.time.precision',
|
|
'2.0': 'dfs.namenode.accesstime.precision'},
|
|
{'1.0': 'dfs.backup.address',
|
|
'2.0': 'dfs.namenode.backup.address'},
|
|
{'1.0': 'dfs.backup.http.address',
|
|
'2.0': 'dfs.namenode.backup.http-address'},
|
|
{'1.0': 'dfs.balance.bandwidthPerSec',
|
|
'2.0': 'dfs.datanode.balance.bandwidthPerSec'},
|
|
{'1.0': 'dfs.block.size',
|
|
'2.0': 'dfs.blocksize'},
|
|
{'1.0': 'dfs.client.buffer.dir',
|
|
'2.0': 'fs.client.buffer.dir'},
|
|
{'1.0': 'dfs.data.dir',
|
|
'2.0': 'dfs.datanode.data.dir'},
|
|
{'1.0': 'dfs.datanode.max.xcievers',
|
|
'2.0': 'dfs.datanode.max.transfer.threads'},
|
|
{'1.0': 'dfs.df.interval',
|
|
'2.0': 'fs.df.interval'},
|
|
{'1.0': 'dfs.http.address',
|
|
'2.0': 'dfs.namenode.http-address'},
|
|
{'1.0': 'dfs.https.address',
|
|
'2.0': 'dfs.namenode.https-address'},
|
|
{'1.0': 'dfs.https.client.keystore.resource',
|
|
'2.0': 'dfs.client.https.keystore.resource'},
|
|
{'1.0': 'dfs.https.need.client.auth',
|
|
'2.0': 'dfs.client.https.need-auth'},
|
|
{'1.0': 'dfs.max-repl-streams',
|
|
'2.0': 'dfs.namenode.replication.max-streams'},
|
|
{'1.0': 'dfs.max.objects',
|
|
'2.0': 'dfs.namenode.max.objects'},
|
|
{'1.0': 'dfs.name.dir',
|
|
'2.0': 'dfs.namenode.name.dir'},
|
|
{'1.0': 'dfs.name.dir.restore',
|
|
'2.0': 'dfs.namenode.name.dir.restore'},
|
|
{'1.0': 'dfs.name.edits.dir',
|
|
'2.0': 'dfs.namenode.edits.dir'},
|
|
{'1.0': 'dfs.permissions',
|
|
'2.0': 'dfs.permissions.enabled'},
|
|
{'1.0': 'dfs.permissions.supergroup',
|
|
'2.0': 'dfs.permissions.superusergroup'},
|
|
{'1.0': 'dfs.read.prefetch.size',
|
|
'2.0': 'dfs.client.read.prefetch.size'},
|
|
{'1.0': 'dfs.replication.considerLoad',
|
|
'2.0': 'dfs.namenode.replication.considerLoad'},
|
|
{'1.0': 'dfs.replication.interval',
|
|
'2.0': 'dfs.namenode.replication.interval'},
|
|
{'1.0': 'dfs.replication.min',
|
|
'2.0': 'dfs.namenode.replication.min'},
|
|
{'1.0': 'dfs.replication.pending.timeout.sec',
|
|
'2.0': 'dfs.namenode.replication.pending.timeout-sec'},
|
|
{'1.0': 'dfs.safemode.extension',
|
|
'2.0': 'dfs.namenode.safemode.extension'},
|
|
{'1.0': 'dfs.safemode.threshold.pct',
|
|
'2.0': 'dfs.namenode.safemode.threshold-pct'},
|
|
{'1.0': 'dfs.secondary.http.address',
|
|
'2.0': 'dfs.namenode.secondary.http-address'},
|
|
{'1.0': 'dfs.socket.timeout',
|
|
'2.0': 'dfs.client.socket-timeout'},
|
|
{'1.0': 'dfs.upgrade.permission',
|
|
'2.0': 'dfs.namenode.upgrade.permission'},
|
|
{'1.0': 'dfs.write.packet.size',
|
|
'2.0': 'dfs.client-write-packet-size'},
|
|
{'1.0': 'fs.checkpoint.dir',
|
|
'2.0': 'dfs.namenode.checkpoint.dir'},
|
|
{'1.0': 'fs.checkpoint.edits.dir',
|
|
'2.0': 'dfs.namenode.checkpoint.edits.dir'},
|
|
{'1.0': 'fs.checkpoint.period',
|
|
'2.0': 'dfs.namenode.checkpoint.period'},
|
|
{'1.0': 'fs.default.name',
|
|
'2.0': 'fs.defaultFS'},
|
|
{'1.0': 'hadoop.configured.node.mapping',
|
|
'2.0': 'net.topology.configured.node.mapping'},
|
|
{'1.0': 'hadoop.job.history.location',
|
|
'2.0': 'mapreduce.jobtracker.jobhistory.location'},
|
|
{'1.0': 'hadoop.native.lib',
|
|
'2.0': 'io.native.lib.available'},
|
|
{'1.0': 'hadoop.net.static.resolutions',
|
|
'2.0': 'mapreduce.tasktracker.net.static.resolutions'},
|
|
{'1.0': 'hadoop.pipes.command-file.keep',
|
|
'2.0': 'mapreduce.pipes.commandfile.preserve'},
|
|
{'1.0': 'hadoop.pipes.executable',
|
|
'2.0': 'mapreduce.pipes.executable'},
|
|
{'1.0': 'hadoop.pipes.executable.interpretor',
|
|
'2.0': 'mapreduce.pipes.executable.interpretor'},
|
|
{'1.0': 'hadoop.pipes.java.mapper',
|
|
'2.0': 'mapreduce.pipes.isjavamapper'},
|
|
{'1.0': 'hadoop.pipes.java.recordreader',
|
|
'2.0': 'mapreduce.pipes.isjavarecordreader'},
|
|
{'1.0': 'hadoop.pipes.java.recordwriter',
|
|
'2.0': 'mapreduce.pipes.isjavarecordwriter'},
|
|
{'1.0': 'hadoop.pipes.java.reducer',
|
|
'2.0': 'mapreduce.pipes.isjavareducer'},
|
|
{'1.0': 'hadoop.pipes.partitioner',
|
|
'2.0': 'mapreduce.pipes.partitioner'},
|
|
{'1.0': 'heartbeat.recheck.interval',
|
|
'2.0': 'dfs.namenode.heartbeat.recheck-interval'},
|
|
{'1.0': 'io.bytes.per.checksum',
|
|
'2.0': 'dfs.bytes-per-checksum'},
|
|
{'1.0': 'io.sort.factor',
|
|
'2.0': 'mapreduce.task.io.sort.factor'},
|
|
{'1.0': 'io.sort.mb',
|
|
'2.0': 'mapreduce.task.io.sort.mb'},
|
|
{'1.0': 'io.sort.spill.percent',
|
|
'2.0': 'mapreduce.map.sort.spill.percent'},
|
|
{'1.0': 'job.end.notification.url',
|
|
'2.0': 'mapreduce.job.end-notification.url'},
|
|
{'1.0': 'job.end.retry.attempts',
|
|
'2.0': 'mapreduce.job.end-notification.retry.attempts'},
|
|
{'1.0': 'job.end.retry.interval',
|
|
'2.0': 'mapreduce.job.end-notification.retry.interval'},
|
|
{'1.0': 'job.local.dir',
|
|
'2.0': 'mapreduce.job.local.dir'},
|
|
{'1.0': 'jobclient.completion.poll.interval',
|
|
'2.0': 'mapreduce.client.completion.pollinterval'},
|
|
{'1.0': 'jobclient.output.filter',
|
|
'2.0': 'mapreduce.client.output.filter'},
|
|
{'1.0': 'jobclient.progress.monitor.poll.interval',
|
|
'2.0': 'mapreduce.client.progressmonitor.pollinterval'},
|
|
{'1.0': 'keep.failed.task.files',
|
|
'2.0': 'mapreduce.task.files.preserve.failedtasks'},
|
|
{'1.0': 'keep.task.files.pattern',
|
|
'2.0': 'mapreduce.task.files.preserve.filepattern'},
|
|
{'1.0': 'key.value.separator.in.input.line',
|
|
'2.0': 'mapreduce.input.keyvaluelinerecordreader.key.value.separator'},
|
|
{'1.0': 'local.cache.size',
|
|
'2.0': 'mapreduce.tasktracker.cache.local.size'},
|
|
{'1.0': 'map.input.file',
|
|
'2.0': 'mapreduce.map.input.file'},
|
|
{'1.0': 'map.input.length',
|
|
'2.0': 'mapreduce.map.input.length'},
|
|
{'1.0': 'map.input.start',
|
|
'2.0': 'mapreduce.map.input.start'},
|
|
{'1.0': 'map.output.key.field.separator',
|
|
'2.0': 'mapreduce.map.output.key.field.separator'},
|
|
{'1.0': 'map.output.key.value.fields.spec',
|
|
'2.0': 'mapreduce.fieldsel.map.output.key.value.fields.spec'},
|
|
{'1.0': 'mapred.acls.enabled',
|
|
'2.0': 'mapreduce.cluster.acls.enabled'},
|
|
{'1.0': 'mapred.binary.partitioner.left.offset',
|
|
'2.0': 'mapreduce.partition.binarypartitioner.left.offset'},
|
|
{'1.0': 'mapred.binary.partitioner.right.offset',
|
|
'2.0': 'mapreduce.partition.binarypartitioner.right.offset'},
|
|
{'1.0': 'mapred.cache.archives',
|
|
'2.0': 'mapreduce.job.cache.archives'},
|
|
{'1.0': 'mapred.cache.archives.timestamps',
|
|
'2.0': 'mapreduce.job.cache.archives.timestamps'},
|
|
{'1.0': 'mapred.cache.files',
|
|
'2.0': 'mapreduce.job.cache.files'},
|
|
{'1.0': 'mapred.cache.files.timestamps',
|
|
'2.0': 'mapreduce.job.cache.files.timestamps'},
|
|
{'1.0': 'mapred.cache.localArchives',
|
|
'2.0': 'mapreduce.job.cache.local.archives'},
|
|
{'1.0': 'mapred.cache.localFiles',
|
|
'2.0': 'mapreduce.job.cache.local.files'},
|
|
{'1.0': 'mapred.child.tmp',
|
|
'2.0': 'mapreduce.task.tmp.dir'},
|
|
{'1.0': 'mapred.cluster.average.blacklist.threshold',
|
|
'2.0': 'mapreduce.jobtracker.blacklist.average.threshold'},
|
|
{'1.0': 'mapred.cluster.map.memory.mb',
|
|
'2.0': 'mapreduce.cluster.mapmemory.mb'},
|
|
{'1.0': 'mapred.cluster.max.map.memory.mb',
|
|
'2.0': 'mapreduce.jobtracker.maxmapmemory.mb'},
|
|
{'1.0': 'mapred.cluster.max.reduce.memory.mb',
|
|
'2.0': 'mapreduce.jobtracker.maxreducememory.mb'},
|
|
{'1.0': 'mapred.cluster.reduce.memory.mb',
|
|
'2.0': 'mapreduce.cluster.reducememory.mb'},
|
|
{'1.0': 'mapred.committer.job.setup.cleanup.needed',
|
|
'2.0': 'mapreduce.job.committer.setup.cleanup.needed'},
|
|
{'1.0': 'mapred.compress.map.output',
|
|
'2.0': 'mapreduce.map.output.compress'},
|
|
{'1.0': 'mapred.create.symlink',
|
|
'2.0': 'mapreduce.job.cache.symlink.create'},
|
|
{'1.0': 'mapred.data.field.separator',
|
|
'2.0': 'mapreduce.fieldsel.data.field.separator'},
|
|
{'1.0': 'mapred.debug.out.lines',
|
|
'2.0': 'mapreduce.task.debugout.lines'},
|
|
{'1.0': 'mapred.healthChecker.interval',
|
|
'2.0': 'mapreduce.tasktracker.healthchecker.interval'},
|
|
{'1.0': 'mapred.healthChecker.script.args',
|
|
'2.0': 'mapreduce.tasktracker.healthchecker.script.args'},
|
|
{'1.0': 'mapred.healthChecker.script.path',
|
|
'2.0': 'mapreduce.tasktracker.healthchecker.script.path'},
|
|
{'1.0': 'mapred.healthChecker.script.timeout',
|
|
'2.0': 'mapreduce.tasktracker.healthchecker.script.timeout'},
|
|
{'1.0': 'mapred.heartbeats.in.second',
|
|
'2.0': 'mapreduce.jobtracker.heartbeats.in.second'},
|
|
{'1.0': 'mapred.hosts',
|
|
'2.0': 'mapreduce.jobtracker.hosts.filename'},
|
|
{'1.0': 'mapred.hosts.exclude',
|
|
'2.0': 'mapreduce.jobtracker.hosts.exclude.filename'},
|
|
{'1.0': 'mapred.inmem.merge.threshold',
|
|
'2.0': 'mapreduce.reduce.merge.inmem.threshold'},
|
|
{'1.0': 'mapred.input.dir',
|
|
'2.0': 'mapreduce.input.fileinputformat.inputdir'},
|
|
{'1.0': 'mapred.input.dir.formats',
|
|
'2.0': 'mapreduce.input.multipleinputs.dir.formats'},
|
|
{'1.0': 'mapred.input.dir.mappers',
|
|
'2.0': 'mapreduce.input.multipleinputs.dir.mappers'},
|
|
{'1.0': 'mapred.input.pathFilter.class',
|
|
'2.0': 'mapreduce.input.pathFilter.class'},
|
|
{'1.0': 'mapred.jar',
|
|
'2.0': 'mapreduce.job.jar'},
|
|
{'1.0': 'mapred.job.classpath.archives',
|
|
'2.0': 'mapreduce.job.classpath.archives'},
|
|
{'1.0': 'mapred.job.classpath.files',
|
|
'2.0': 'mapreduce.job.classpath.files'},
|
|
{'1.0': 'mapred.job.id',
|
|
'2.0': 'mapreduce.job.id'},
|
|
{'1.0': 'mapred.job.map.memory.mb',
|
|
'2.0': 'mapreduce.map.memory.mb'},
|
|
{'1.0': 'mapred.job.name',
|
|
'2.0': 'mapreduce.job.name'},
|
|
{'1.0': 'mapred.job.priority',
|
|
'2.0': 'mapreduce.job.priority'},
|
|
{'1.0': 'mapred.job.queue.name',
|
|
'2.0': 'mapreduce.job.queuename'},
|
|
{'1.0': 'mapred.job.reduce.input.buffer.percent',
|
|
'2.0': 'mapreduce.reduce.input.buffer.percent'},
|
|
{'1.0': 'mapred.job.reduce.markreset.buffer.percent',
|
|
'2.0': 'mapreduce.reduce.markreset.buffer.percent'},
|
|
{'1.0': 'mapred.job.reduce.memory.mb',
|
|
'2.0': 'mapreduce.reduce.memory.mb'},
|
|
{'1.0': 'mapred.job.reduce.total.mem.bytes',
|
|
'2.0': 'mapreduce.reduce.memory.totalbytes'},
|
|
{'1.0': 'mapred.job.reuse.jvm.num.tasks',
|
|
'2.0': 'mapreduce.job.jvm.numtasks'},
|
|
{'1.0': 'mapred.job.shuffle.input.buffer.percent',
|
|
'2.0': 'mapreduce.reduce.shuffle.input.buffer.percent'},
|
|
{'1.0': 'mapred.job.shuffle.merge.percent',
|
|
'2.0': 'mapreduce.reduce.shuffle.merge.percent'},
|
|
{'1.0': 'mapred.job.tracker',
|
|
'2.0': 'mapreduce.jobtracker.address'},
|
|
{'1.0': 'mapred.job.tracker.handler.count',
|
|
'2.0': 'mapreduce.jobtracker.handler.count'},
|
|
{'1.0': 'mapred.job.tracker.history.completed.location',
|
|
'2.0': 'mapreduce.jobtracker.jobhistory.completed.location'},
|
|
{'1.0': 'mapred.job.tracker.http.address',
|
|
'2.0': 'mapreduce.jobtracker.http.address'},
|
|
{'1.0': 'mapred.job.tracker.jobhistory.lru.cache.size',
|
|
'2.0': 'mapreduce.jobtracker.jobhistory.lru.cache.size'},
|
|
{'1.0': 'mapred.job.tracker.persist.jobstatus.active',
|
|
'2.0': 'mapreduce.jobtracker.persist.jobstatus.active'},
|
|
{'1.0': 'mapred.job.tracker.persist.jobstatus.dir',
|
|
'2.0': 'mapreduce.jobtracker.persist.jobstatus.dir'},
|
|
{'1.0': 'mapred.job.tracker.persist.jobstatus.hours',
|
|
'2.0': 'mapreduce.jobtracker.persist.jobstatus.hours'},
|
|
{'1.0': 'mapred.job.tracker.retire.jobs',
|
|
'2.0': 'mapreduce.jobtracker.retirejobs'},
|
|
{'1.0': 'mapred.job.tracker.retiredjobs.cache.size',
|
|
'2.0': 'mapreduce.jobtracker.retiredjobs.cache.size'},
|
|
{'1.0': 'mapred.jobinit.threads',
|
|
'2.0': 'mapreduce.jobtracker.jobinit.threads'},
|
|
{'1.0': 'mapred.jobtracker.instrumentation',
|
|
'2.0': 'mapreduce.jobtracker.instrumentation'},
|
|
{'1.0': 'mapred.jobtracker.job.history.block.size',
|
|
'2.0': 'mapreduce.jobtracker.jobhistory.block.size'},
|
|
{'1.0': 'mapred.jobtracker.maxtasks.per.job',
|
|
'2.0': 'mapreduce.jobtracker.maxtasks.perjob'},
|
|
{'1.0': 'mapred.jobtracker.restart.recover',
|
|
'2.0': 'mapreduce.jobtracker.restart.recover'},
|
|
{'1.0': 'mapred.jobtracker.taskScheduler',
|
|
'2.0': 'mapreduce.jobtracker.taskscheduler'},
|
|
{'1.0': 'mapred.jobtracker.taskScheduler.maxRunningTasksPerJob',
|
|
'2.0': 'mapreduce.jobtracker.taskscheduler.maxrunningtasks.perjob'},
|
|
{'1.0': 'mapred.jobtracker.taskalloc.capacitypad',
|
|
'2.0': 'mapreduce.jobtracker.taskscheduler.taskalloc.capacitypad'},
|
|
{'1.0': 'mapred.join.expr',
|
|
'2.0': 'mapreduce.join.expr'},
|
|
{'1.0': 'mapred.join.keycomparator',
|
|
'2.0': 'mapreduce.join.keycomparator'},
|
|
{'1.0': 'mapred.lazy.output.format',
|
|
'2.0': 'mapreduce.output.lazyoutputformat.outputformat'},
|
|
{'1.0': 'mapred.line.input.format.linespermap',
|
|
'2.0': 'mapreduce.input.lineinputformat.linespermap'},
|
|
{'1.0': 'mapred.linerecordreader.maxlength',
|
|
'2.0': 'mapreduce.input.linerecordreader.line.maxlength'},
|
|
{'1.0': 'mapred.local.dir',
|
|
'2.0': 'mapreduce.cluster.local.dir'},
|
|
{'1.0': 'mapred.local.dir.minspacekill',
|
|
'2.0': 'mapreduce.tasktracker.local.dir.minspacekill'},
|
|
{'1.0': 'mapred.local.dir.minspacestart',
|
|
'2.0': 'mapreduce.tasktracker.local.dir.minspacestart'},
|
|
{'1.0': 'mapred.map.child.env',
|
|
'2.0': 'mapreduce.map.env'},
|
|
{'1.0': 'mapred.map.child.java.opts',
|
|
'2.0': 'mapreduce.map.java.opts'},
|
|
{'1.0': 'mapred.map.child.log.level',
|
|
'2.0': 'mapreduce.map.log.level'},
|
|
{'1.0': 'mapred.map.max.attempts',
|
|
'2.0': 'mapreduce.map.maxattempts'},
|
|
{'1.0': 'mapred.map.output.compression.codec',
|
|
'2.0': 'mapreduce.map.output.compress.codec'},
|
|
{'1.0': 'mapred.map.task.debug.script',
|
|
'2.0': 'mapreduce.map.debug.script'},
|
|
{'1.0': 'mapred.map.tasks',
|
|
'2.0': 'mapreduce.job.maps'},
|
|
{'1.0': 'mapred.map.tasks.speculative.execution',
|
|
'2.0': 'mapreduce.map.speculative'},
|
|
{'1.0': 'mapred.mapoutput.key.class',
|
|
'2.0': 'mapreduce.map.output.key.class'},
|
|
{'1.0': 'mapred.mapoutput.value.class',
|
|
'2.0': 'mapreduce.map.output.value.class'},
|
|
{'1.0': 'mapred.mapper.regex',
|
|
'2.0': 'mapreduce.mapper.regex'},
|
|
{'1.0': 'mapred.mapper.regex.group',
|
|
'2.0': 'mapreduce.mapper.regexmapper..group'},
|
|
{'1.0': 'mapred.max.map.failures.percent',
|
|
'2.0': 'mapreduce.map.failures.maxpercent'},
|
|
{'1.0': 'mapred.max.reduce.failures.percent',
|
|
'2.0': 'mapreduce.reduce.failures.maxpercent'},
|
|
{'1.0': 'mapred.max.split.size',
|
|
'2.0': 'mapreduce.input.fileinputformat.split.maxsize'},
|
|
{'1.0': 'mapred.max.tracker.blacklists',
|
|
'2.0': 'mapreduce.jobtracker.tasktracker.maxblacklists'},
|
|
{'1.0': 'mapred.max.tracker.failures',
|
|
'2.0': 'mapreduce.job.maxtaskfailures.per.tracker'},
|
|
{'1.0': 'mapred.merge.recordsBeforeProgress',
|
|
'2.0': 'mapreduce.task.merge.progress.records'},
|
|
{'1.0': 'mapred.min.split.size',
|
|
'2.0': 'mapreduce.input.fileinputformat.split.minsize'},
|
|
{'1.0': 'mapred.min.split.size.per.node',
|
|
'2.0': 'mapreduce.input.fileinputformat.split.minsize.per.node'},
|
|
{'1.0': 'mapred.min.split.size.per.rack',
|
|
'2.0': 'mapreduce.input.fileinputformat.split.minsize.per.rack'},
|
|
{'1.0': 'mapred.output.compress',
|
|
'2.0': 'mapreduce.output.fileoutputformat.compress'},
|
|
{'1.0': 'mapred.output.compression.codec',
|
|
'2.0': 'mapreduce.output.fileoutputformat.compress.codec'},
|
|
{'1.0': 'mapred.output.compression.type',
|
|
'2.0': 'mapreduce.output.fileoutputformat.compress.type'},
|
|
{'1.0': 'mapred.output.dir',
|
|
'2.0': 'mapreduce.output.fileoutputformat.outputdir'},
|
|
{'1.0': 'mapred.output.key.class',
|
|
'2.0': 'mapreduce.job.output.key.class'},
|
|
{'1.0': 'mapred.output.key.comparator.class',
|
|
'2.0': 'mapreduce.job.output.key.comparator.class'},
|
|
{'1.0': 'mapred.output.value.class',
|
|
'2.0': 'mapreduce.job.output.value.class'},
|
|
{'1.0': 'mapred.output.value.groupfn.class',
|
|
'2.0': 'mapreduce.job.output.group.comparator.class'},
|
|
{'1.0': 'mapred.permissions.supergroup',
|
|
'2.0': 'mapreduce.cluster.permissions.supergroup'},
|
|
{'1.0': 'mapred.pipes.user.inputformat',
|
|
'2.0': 'mapreduce.pipes.inputformat'},
|
|
{'1.0': 'mapred.reduce.child.env',
|
|
'2.0': 'mapreduce.reduce.env'},
|
|
{'1.0': 'mapred.reduce.child.java.opts',
|
|
'2.0': 'mapreduce.reduce.java.opts'},
|
|
{'1.0': 'mapred.reduce.child.log.level',
|
|
'2.0': 'mapreduce.reduce.log.level'},
|
|
{'1.0': 'mapred.reduce.max.attempts',
|
|
'2.0': 'mapreduce.reduce.maxattempts'},
|
|
{'1.0': 'mapred.reduce.parallel.copies',
|
|
'2.0': 'mapreduce.reduce.shuffle.parallelcopies'},
|
|
{'1.0': 'mapred.reduce.slowstart.completed.maps',
|
|
'2.0': 'mapreduce.job.reduce.slowstart.completedmaps'},
|
|
{'1.0': 'mapred.reduce.task.debug.script',
|
|
'2.0': 'mapreduce.reduce.debug.script'},
|
|
{'1.0': 'mapred.reduce.tasks',
|
|
'2.0': 'mapreduce.job.reduces'},
|
|
{'1.0': 'mapred.reduce.tasks.speculative.execution',
|
|
'2.0': 'mapreduce.reduce.speculative'},
|
|
{'1.0': 'mapred.seqbinary.output.key.class',
|
|
'2.0': 'mapreduce.output.seqbinaryoutputformat.key.class'},
|
|
{'1.0': 'mapred.seqbinary.output.value.class',
|
|
'2.0': 'mapreduce.output.seqbinaryoutputformat.value.class'},
|
|
{'1.0': 'mapred.shuffle.connect.timeout',
|
|
'2.0': 'mapreduce.reduce.shuffle.connect.timeout'},
|
|
{'1.0': 'mapred.shuffle.read.timeout',
|
|
'2.0': 'mapreduce.reduce.shuffle.read.timeout'},
|
|
{'1.0': 'mapred.skip.attempts.to.start.skipping',
|
|
'2.0': 'mapreduce.task.skip.start.attempts'},
|
|
{'1.0': 'mapred.skip.map.auto.incr.proc.count',
|
|
'2.0': 'mapreduce.map.skip.proc-count.auto-incr'},
|
|
{'1.0': 'mapred.skip.map.max.skip.records',
|
|
'2.0': 'mapreduce.map.skip.maxrecords'},
|
|
{'1.0': 'mapred.skip.on',
|
|
'2.0': 'mapreduce.job.skiprecords'},
|
|
{'1.0': 'mapred.skip.out.dir',
|
|
'2.0': 'mapreduce.job.skip.outdir'},
|
|
{'1.0': 'mapred.skip.reduce.auto.incr.proc.count',
|
|
'2.0': 'mapreduce.reduce.skip.proc-count.auto-incr'},
|
|
{'1.0': 'mapred.skip.reduce.max.skip.groups',
|
|
'2.0': 'mapreduce.reduce.skip.maxgroups'},
|
|
{'1.0': 'mapred.speculative.execution.slowNodeThreshold',
|
|
'2.0': 'mapreduce.job.speculative.slownodethreshold'},
|
|
{'1.0': 'mapred.speculative.execution.slowTaskThreshold',
|
|
'2.0': 'mapreduce.job.speculative.slowtaskthreshold'},
|
|
{'1.0': 'mapred.speculative.execution.speculativeCap',
|
|
'2.0': 'mapreduce.job.speculative.speculativecap'},
|
|
{'1.0': 'mapred.submit.replication',
|
|
'2.0': 'mapreduce.client.submit.file.replication'},
|
|
{'1.0': 'mapred.system.dir',
|
|
'2.0': 'mapreduce.jobtracker.system.dir'},
|
|
{'1.0': 'mapred.task.cache.levels',
|
|
'2.0': 'mapreduce.jobtracker.taskcache.levels'},
|
|
{'1.0': 'mapred.task.id',
|
|
'2.0': 'mapreduce.task.attempt.id'},
|
|
{'1.0': 'mapred.task.is.map',
|
|
'2.0': 'mapreduce.task.ismap'},
|
|
{'1.0': 'mapred.task.partition',
|
|
'2.0': 'mapreduce.task.partition'},
|
|
{'1.0': 'mapred.task.profile',
|
|
'2.0': 'mapreduce.task.profile'},
|
|
{'1.0': 'mapred.task.profile.maps',
|
|
'2.0': 'mapreduce.task.profile.maps'},
|
|
{'1.0': 'mapred.task.profile.params',
|
|
'2.0': 'mapreduce.task.profile.params'},
|
|
{'1.0': 'mapred.task.profile.reduces',
|
|
'2.0': 'mapreduce.task.profile.reduces'},
|
|
{'1.0': 'mapred.task.timeout',
|
|
'2.0': 'mapreduce.task.timeout'},
|
|
{'1.0': 'mapred.task.tracker.http.address',
|
|
'2.0': 'mapreduce.tasktracker.http.address'},
|
|
{'1.0': 'mapred.task.tracker.report.address',
|
|
'2.0': 'mapreduce.tasktracker.report.address'},
|
|
{'1.0': 'mapred.task.tracker.task-controller',
|
|
'2.0': 'mapreduce.tasktracker.taskcontroller'},
|
|
{'1.0': 'mapred.tasktracker.dns.interface',
|
|
'2.0': 'mapreduce.tasktracker.dns.interface'},
|
|
{'1.0': 'mapred.tasktracker.dns.nameserver',
|
|
'2.0': 'mapreduce.tasktracker.dns.nameserver'},
|
|
{'1.0': 'mapred.tasktracker.events.batchsize',
|
|
'2.0': 'mapreduce.tasktracker.events.batchsize'},
|
|
{'1.0': 'mapred.tasktracker.expiry.interval',
|
|
'2.0': 'mapreduce.jobtracker.expire.trackers.interval'},
|
|
{'1.0': 'mapred.tasktracker.indexcache.mb',
|
|
'2.0': 'mapreduce.tasktracker.indexcache.mb'},
|
|
{'1.0': 'mapred.tasktracker.instrumentation',
|
|
'2.0': 'mapreduce.tasktracker.instrumentation'},
|
|
{'1.0': 'mapred.tasktracker.map.tasks.maximum',
|
|
'2.0': 'mapreduce.tasktracker.map.tasks.maximum'},
|
|
{'1.0': 'mapred.tasktracker.memory_calculator_plugin',
|
|
'2.0': 'mapreduce.tasktracker.resourcecalculatorplugin'},
|
|
{'1.0': 'mapred.tasktracker.memorycalculatorplugin',
|
|
'2.0': 'mapreduce.tasktracker.resourcecalculatorplugin'},
|
|
{'1.0': 'mapred.tasktracker.reduce.tasks.maximum',
|
|
'2.0': 'mapreduce.tasktracker.reduce.tasks.maximum'},
|
|
{'1.0': 'mapred.tasktracker.taskmemorymanager.monitoring-interval',
|
|
'2.0': 'mapreduce.tasktracker.taskmemorymanager.monitoringinterval'},
|
|
{'1.0': 'mapred.tasktracker.tasks.sleeptime-before-sigkill',
|
|
'2.0': 'mapreduce.tasktracker.tasks.sleeptimebeforesigkill'},
|
|
{'1.0': 'mapred.temp.dir',
|
|
'2.0': 'mapreduce.cluster.temp.dir'},
|
|
{'1.0': 'mapred.text.key.comparator.options',
|
|
'2.0': 'mapreduce.partition.keycomparator.options'},
|
|
{'1.0': 'mapred.text.key.partitioner.options',
|
|
'2.0': 'mapreduce.partition.keypartitioner.options'},
|
|
{'1.0': 'mapred.textoutputformat.separator',
|
|
'2.0': 'mapreduce.output.textoutputformat.separator'},
|
|
{'1.0': 'mapred.tip.id',
|
|
'2.0': 'mapreduce.task.id'},
|
|
{'1.0': 'mapred.used.genericoptionsparser',
|
|
'2.0': 'mapreduce.client.genericoptionsparser.used'},
|
|
{'1.0': 'mapred.userlog.limit.kb',
|
|
'2.0': 'mapreduce.task.userlog.limit.kb'},
|
|
{'1.0': 'mapred.userlog.retain.hours',
|
|
'2.0': 'mapreduce.job.userlog.retain.hours'},
|
|
{'1.0': 'mapred.work.output.dir',
|
|
'2.0': 'mapreduce.task.output.dir'},
|
|
{'1.0': 'mapred.working.dir',
|
|
'2.0': 'mapreduce.job.working.dir'},
|
|
{'1.0': 'mapreduce.combine.class',
|
|
'2.0': 'mapreduce.job.combine.class'},
|
|
{'1.0': 'mapreduce.inputformat.class',
|
|
'2.0': 'mapreduce.job.inputformat.class'},
|
|
{'1.0': 'mapreduce.jobtracker.permissions.supergroup',
|
|
'2.0': 'mapreduce.cluster.permissions.supergroup'},
|
|
{'1.0': 'mapreduce.map.class',
|
|
'2.0': 'mapreduce.job.map.class'},
|
|
{'1.0': 'mapreduce.outputformat.class',
|
|
'2.0': 'mapreduce.job.outputformat.class'},
|
|
{'1.0': 'mapreduce.partitioner.class',
|
|
'2.0': 'mapreduce.job.partitioner.class'},
|
|
{'1.0': 'mapreduce.reduce.class',
|
|
'2.0': 'mapreduce.job.reduce.class'},
|
|
{'1.0': 'min.num.spills.for.combine',
|
|
'2.0': 'mapreduce.map.combine.minspills'},
|
|
{'1.0': 'reduce.output.key.value.fields.spec',
|
|
'2.0': 'mapreduce.fieldsel.reduce.output.key.value.fields.spec'},
|
|
{'1.0': 'security.job.submission.protocol.acl',
|
|
'2.0': 'security.job.client.protocol.acl'},
|
|
{'1.0': 'security.task.umbilical.protocol.acl',
|
|
'2.0': 'security.job.task.protocol.acl'},
|
|
{'1.0': 'sequencefile.filter.class',
|
|
'2.0': 'mapreduce.input.sequencefileinputfilter.class'},
|
|
{'1.0': 'sequencefile.filter.frequency',
|
|
'2.0': 'mapreduce.input.sequencefileinputfilter.frequency'},
|
|
{'1.0': 'sequencefile.filter.regex',
|
|
'2.0': 'mapreduce.input.sequencefileinputfilter.regex'},
|
|
{'1.0': 'session.id',
|
|
'2.0': 'dfs.metrics.session-id'},
|
|
{'1.0': 'slave.host.name',
|
|
'2.0': 'dfs.datanode.hostname'},
|
|
{'1.0': 'slave.host.name',
|
|
'2.0': 'mapreduce.tasktracker.host.name'},
|
|
{'1.0': 'tasktracker.contention.tracking',
|
|
'2.0': 'mapreduce.tasktracker.contention.tracking'},
|
|
{'1.0': 'tasktracker.http.threads',
|
|
'2.0': 'mapreduce.tasktracker.http.threads'},
|
|
{'1.0': 'topology.node.switch.mapping.impl',
|
|
'2.0': 'net.topology.node.switch.mapping.impl'},
|
|
{'1.0': 'topology.script.file.name',
|
|
'2.0': 'net.topology.script.file.name'},
|
|
{'1.0': 'topology.script.number.args',
|
|
'2.0': 'net.topology.script.number.args'},
|
|
{'1.0': 'user.name',
|
|
'2.0': 'mapreduce.job.user.name'},
|
|
{'1.0': 'webinterface.private.actions',
|
|
'2.0': 'mapreduce.jobtracker.webinterface.trusted'},
|
|
]
|
|
|
|
# Handle compatibility for 0.x versions of Hadoop too
|
|
for jobconf_dict in _JOBCONF_DICT_LIST:
|
|
jobconf_dict['0.20'] = jobconf_dict['1.0']
|
|
jobconf_dict['0.21'] = jobconf_dict['2.0']
|
|
|
|
|
|
def _dict_list_to_compat_map(dict_list):
|
|
# compat_map = {
|
|
# ...
|
|
# a: {'1.0': a, '2.0': b}
|
|
# ..
|
|
# }
|
|
compat_map = {}
|
|
for version_dict in dict_list:
|
|
for value in version_dict.values():
|
|
compat_map[value] = version_dict
|
|
return compat_map
|
|
|
|
|
|
_JOBCONF_MAP = _dict_list_to_compat_map(_JOBCONF_DICT_LIST)
|
|
|
|
|
|
def jobconf_from_env(variable, default=None):
|
|
"""Get the value of a jobconf variable from the runtime environment.
|
|
|
|
For example, a :py:class:`~mrjob.job.MRJob` could use
|
|
``jobconf_from_env('map.input.file')`` to get the name of the file a
|
|
mapper is reading input from.
|
|
|
|
If the name of the jobconf variable is different in different versions of
|
|
Hadoop (e.g. in Hadoop 2.0, ``map.input.file`` is
|
|
``mapreduce.map.input.file``), we'll automatically try all variants before
|
|
giving up.
|
|
|
|
Return *default* if that jobconf variable isn't set.
|
|
"""
|
|
# try variable verbatim first
|
|
name = variable.replace('.', '_')
|
|
if name in os.environ:
|
|
return os.environ[name]
|
|
|
|
# try alternatives (arbitrary order)
|
|
for var in _JOBCONF_MAP.get(variable, {}).values():
|
|
name = var.replace('.', '_')
|
|
if name in os.environ:
|
|
return os.environ[name]
|
|
|
|
return default
|
|
|
|
|
|
def jobconf_from_dict(jobconf, name, default=None):
|
|
"""Get the value of a jobconf variable from the given dictionary.
|
|
|
|
:param dict jobconf: jobconf dictionary
|
|
:param string name: name of the jobconf variable (e.g. ``'user.name'``)
|
|
:param default: fallback value
|
|
|
|
If the name of the jobconf variable is different in different versions of
|
|
Hadoop (e.g. in Hadoop 2, ``map.input.file`` is
|
|
``mapreduce.map.input.file``), we'll automatically try all variants before
|
|
giving up.
|
|
|
|
Return *default* if that jobconf variable isn't set """
|
|
if name in jobconf:
|
|
return jobconf[name]
|
|
|
|
# try alternatives (arbitrary order)
|
|
for alternative in _JOBCONF_MAP.get(name, {}).values():
|
|
if alternative in jobconf:
|
|
return jobconf[alternative]
|
|
|
|
return default
|
|
|
|
|
|
def map_version(version, version_map):
|
|
"""Allows you to look up something by version (e.g. which jobconf variable
|
|
to use, specifying only the versions where that value changed.
|
|
|
|
*version* is a string
|
|
|
|
*version_map* is a map from version (as a string) that a value changed
|
|
to the new value.
|
|
|
|
For efficiency, *version_map* can also be a list of tuples of
|
|
``(LooseVersion(version_as_string), value)``, with oldest versions first.
|
|
|
|
If *version* is less than any version in *version_map*, use the value for
|
|
the earliest version in *version_map*.
|
|
"""
|
|
if version is None:
|
|
raise TypeError
|
|
|
|
if not version_map:
|
|
raise ValueError
|
|
|
|
if isinstance(version_map, dict):
|
|
version_map = sorted((LooseVersion(k), v)
|
|
for k, v in version_map.items())
|
|
|
|
req_version = LooseVersion(version)
|
|
|
|
for min_version, value in reversed(version_map):
|
|
if req_version >= min_version:
|
|
return value
|
|
else:
|
|
return version_map[0][1]
|
|
|
|
|
|
def translate_jobconf(variable, version):
|
|
"""Translate *variable* to Hadoop version *version*. If it's not
|
|
a variable we recognize, leave as-is.
|
|
"""
|
|
if version is None:
|
|
raise TypeError
|
|
|
|
if variable in _JOBCONF_MAP:
|
|
return map_version(version, _JOBCONF_MAP[variable])
|
|
else:
|
|
return variable
|
|
|
|
|
|
def translate_jobconf_for_all_versions(variable):
|
|
"""Get all known variants of the given jobconf variable.
|
|
Unlike :py:func:`translate_jobconf`, returns a list."""
|
|
return sorted(
|
|
set([variable] + list(_JOBCONF_MAP.get(variable, {}).values())))
|
|
|
|
|
|
def translate_jobconf_dict(jobconf, hadoop_version=None):
|
|
"""Translates the configuration property name to match those that
|
|
are accepted in hadoop_version. Prints a warning message if any
|
|
configuration property name does not match the name in the hadoop
|
|
version. Combines the original jobconf with the translated jobconf.
|
|
|
|
:return: a map consisting of the original and translated configuration
|
|
property names and values.
|
|
"""
|
|
translated_jobconf = jobconf.copy()
|
|
translation_warnings = {}
|
|
|
|
for variable, value in jobconf.items():
|
|
if hadoop_version:
|
|
variants = [translate_jobconf(variable, hadoop_version)]
|
|
else:
|
|
variants = translate_jobconf_for_all_versions(variable)
|
|
|
|
for variant in variants:
|
|
if variant in jobconf:
|
|
# this happens if variant == variable or
|
|
# if the variant was in jobconf to start with
|
|
continue
|
|
|
|
translated_jobconf[variant] = value
|
|
|
|
if hadoop_version:
|
|
translation_warnings[variable] = variant
|
|
|
|
if translation_warnings:
|
|
log.warning("Detected hadoop configuration property names that"
|
|
" do not match hadoop version %s:"
|
|
"\nThe have been translated as follows\n %s",
|
|
hadoop_version,
|
|
'\n'.join([
|
|
"%s: %s" % (variable, variant) for variable, variant
|
|
in sorted(translation_warnings.items())]))
|
|
|
|
return translated_jobconf
|
|
|
|
|
|
def uses_yarn(version):
|
|
"""Basically, is this Hadoop 2? This also handles versions in the
|
|
zero series (0.23+) where YARN originated."""
|
|
return (version_gte(version, '2') or
|
|
version_gte(version, '0.23') and not version_gte(version, '1'))
|
|
|
|
|
|
def version_gte(version, cmp_version_str):
|
|
"""Return ``True`` if version >= *cmp_version_str*."""
|
|
|
|
if not isinstance(version, string_types):
|
|
raise TypeError('%r is not a string' % version)
|
|
|
|
if not isinstance(cmp_version_str, string_types):
|
|
raise TypeError('%r is not a string' % cmp_version_str)
|
|
|
|
return LooseVersion(version) >= LooseVersion(cmp_version_str)
|