GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/compat.py
2025-06-22 13:36:01 +05:30

735 lines
31 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2009-2012 Yelp
# Copyright 2013-2014 Yelp and Contributors
# Copyright 2015-2016 Yelp
# Copyright 2018 Ben Dalling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for compatibility with different version of hadoop."""
from distutils.version import LooseVersion
import logging
import os
from mrjob.py2 import string_types
# lists alternative names for jobconf variables
# full listing thanks to translation table in
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/DeprecatedProperties.html # noqa
log = logging.getLogger(__name__)
_JOBCONF_DICT_LIST = [
{'1.0': 'StorageId',
'2.0': 'dfs.datanode.StorageId'},
{'1.0': 'create.empty.dir.if.nonexist',
'2.0': 'mapreduce.jobcontrol.createdir.ifnotexist'},
{'1.0': 'dfs.access.time.precision',
'2.0': 'dfs.namenode.accesstime.precision'},
{'1.0': 'dfs.backup.address',
'2.0': 'dfs.namenode.backup.address'},
{'1.0': 'dfs.backup.http.address',
'2.0': 'dfs.namenode.backup.http-address'},
{'1.0': 'dfs.balance.bandwidthPerSec',
'2.0': 'dfs.datanode.balance.bandwidthPerSec'},
{'1.0': 'dfs.block.size',
'2.0': 'dfs.blocksize'},
{'1.0': 'dfs.client.buffer.dir',
'2.0': 'fs.client.buffer.dir'},
{'1.0': 'dfs.data.dir',
'2.0': 'dfs.datanode.data.dir'},
{'1.0': 'dfs.datanode.max.xcievers',
'2.0': 'dfs.datanode.max.transfer.threads'},
{'1.0': 'dfs.df.interval',
'2.0': 'fs.df.interval'},
{'1.0': 'dfs.http.address',
'2.0': 'dfs.namenode.http-address'},
{'1.0': 'dfs.https.address',
'2.0': 'dfs.namenode.https-address'},
{'1.0': 'dfs.https.client.keystore.resource',
'2.0': 'dfs.client.https.keystore.resource'},
{'1.0': 'dfs.https.need.client.auth',
'2.0': 'dfs.client.https.need-auth'},
{'1.0': 'dfs.max-repl-streams',
'2.0': 'dfs.namenode.replication.max-streams'},
{'1.0': 'dfs.max.objects',
'2.0': 'dfs.namenode.max.objects'},
{'1.0': 'dfs.name.dir',
'2.0': 'dfs.namenode.name.dir'},
{'1.0': 'dfs.name.dir.restore',
'2.0': 'dfs.namenode.name.dir.restore'},
{'1.0': 'dfs.name.edits.dir',
'2.0': 'dfs.namenode.edits.dir'},
{'1.0': 'dfs.permissions',
'2.0': 'dfs.permissions.enabled'},
{'1.0': 'dfs.permissions.supergroup',
'2.0': 'dfs.permissions.superusergroup'},
{'1.0': 'dfs.read.prefetch.size',
'2.0': 'dfs.client.read.prefetch.size'},
{'1.0': 'dfs.replication.considerLoad',
'2.0': 'dfs.namenode.replication.considerLoad'},
{'1.0': 'dfs.replication.interval',
'2.0': 'dfs.namenode.replication.interval'},
{'1.0': 'dfs.replication.min',
'2.0': 'dfs.namenode.replication.min'},
{'1.0': 'dfs.replication.pending.timeout.sec',
'2.0': 'dfs.namenode.replication.pending.timeout-sec'},
{'1.0': 'dfs.safemode.extension',
'2.0': 'dfs.namenode.safemode.extension'},
{'1.0': 'dfs.safemode.threshold.pct',
'2.0': 'dfs.namenode.safemode.threshold-pct'},
{'1.0': 'dfs.secondary.http.address',
'2.0': 'dfs.namenode.secondary.http-address'},
{'1.0': 'dfs.socket.timeout',
'2.0': 'dfs.client.socket-timeout'},
{'1.0': 'dfs.upgrade.permission',
'2.0': 'dfs.namenode.upgrade.permission'},
{'1.0': 'dfs.write.packet.size',
'2.0': 'dfs.client-write-packet-size'},
{'1.0': 'fs.checkpoint.dir',
'2.0': 'dfs.namenode.checkpoint.dir'},
{'1.0': 'fs.checkpoint.edits.dir',
'2.0': 'dfs.namenode.checkpoint.edits.dir'},
{'1.0': 'fs.checkpoint.period',
'2.0': 'dfs.namenode.checkpoint.period'},
{'1.0': 'fs.default.name',
'2.0': 'fs.defaultFS'},
{'1.0': 'hadoop.configured.node.mapping',
'2.0': 'net.topology.configured.node.mapping'},
{'1.0': 'hadoop.job.history.location',
'2.0': 'mapreduce.jobtracker.jobhistory.location'},
{'1.0': 'hadoop.native.lib',
'2.0': 'io.native.lib.available'},
{'1.0': 'hadoop.net.static.resolutions',
'2.0': 'mapreduce.tasktracker.net.static.resolutions'},
{'1.0': 'hadoop.pipes.command-file.keep',
'2.0': 'mapreduce.pipes.commandfile.preserve'},
{'1.0': 'hadoop.pipes.executable',
'2.0': 'mapreduce.pipes.executable'},
{'1.0': 'hadoop.pipes.executable.interpretor',
'2.0': 'mapreduce.pipes.executable.interpretor'},
{'1.0': 'hadoop.pipes.java.mapper',
'2.0': 'mapreduce.pipes.isjavamapper'},
{'1.0': 'hadoop.pipes.java.recordreader',
'2.0': 'mapreduce.pipes.isjavarecordreader'},
{'1.0': 'hadoop.pipes.java.recordwriter',
'2.0': 'mapreduce.pipes.isjavarecordwriter'},
{'1.0': 'hadoop.pipes.java.reducer',
'2.0': 'mapreduce.pipes.isjavareducer'},
{'1.0': 'hadoop.pipes.partitioner',
'2.0': 'mapreduce.pipes.partitioner'},
{'1.0': 'heartbeat.recheck.interval',
'2.0': 'dfs.namenode.heartbeat.recheck-interval'},
{'1.0': 'io.bytes.per.checksum',
'2.0': 'dfs.bytes-per-checksum'},
{'1.0': 'io.sort.factor',
'2.0': 'mapreduce.task.io.sort.factor'},
{'1.0': 'io.sort.mb',
'2.0': 'mapreduce.task.io.sort.mb'},
{'1.0': 'io.sort.spill.percent',
'2.0': 'mapreduce.map.sort.spill.percent'},
{'1.0': 'job.end.notification.url',
'2.0': 'mapreduce.job.end-notification.url'},
{'1.0': 'job.end.retry.attempts',
'2.0': 'mapreduce.job.end-notification.retry.attempts'},
{'1.0': 'job.end.retry.interval',
'2.0': 'mapreduce.job.end-notification.retry.interval'},
{'1.0': 'job.local.dir',
'2.0': 'mapreduce.job.local.dir'},
{'1.0': 'jobclient.completion.poll.interval',
'2.0': 'mapreduce.client.completion.pollinterval'},
{'1.0': 'jobclient.output.filter',
'2.0': 'mapreduce.client.output.filter'},
{'1.0': 'jobclient.progress.monitor.poll.interval',
'2.0': 'mapreduce.client.progressmonitor.pollinterval'},
{'1.0': 'keep.failed.task.files',
'2.0': 'mapreduce.task.files.preserve.failedtasks'},
{'1.0': 'keep.task.files.pattern',
'2.0': 'mapreduce.task.files.preserve.filepattern'},
{'1.0': 'key.value.separator.in.input.line',
'2.0': 'mapreduce.input.keyvaluelinerecordreader.key.value.separator'},
{'1.0': 'local.cache.size',
'2.0': 'mapreduce.tasktracker.cache.local.size'},
{'1.0': 'map.input.file',
'2.0': 'mapreduce.map.input.file'},
{'1.0': 'map.input.length',
'2.0': 'mapreduce.map.input.length'},
{'1.0': 'map.input.start',
'2.0': 'mapreduce.map.input.start'},
{'1.0': 'map.output.key.field.separator',
'2.0': 'mapreduce.map.output.key.field.separator'},
{'1.0': 'map.output.key.value.fields.spec',
'2.0': 'mapreduce.fieldsel.map.output.key.value.fields.spec'},
{'1.0': 'mapred.acls.enabled',
'2.0': 'mapreduce.cluster.acls.enabled'},
{'1.0': 'mapred.binary.partitioner.left.offset',
'2.0': 'mapreduce.partition.binarypartitioner.left.offset'},
{'1.0': 'mapred.binary.partitioner.right.offset',
'2.0': 'mapreduce.partition.binarypartitioner.right.offset'},
{'1.0': 'mapred.cache.archives',
'2.0': 'mapreduce.job.cache.archives'},
{'1.0': 'mapred.cache.archives.timestamps',
'2.0': 'mapreduce.job.cache.archives.timestamps'},
{'1.0': 'mapred.cache.files',
'2.0': 'mapreduce.job.cache.files'},
{'1.0': 'mapred.cache.files.timestamps',
'2.0': 'mapreduce.job.cache.files.timestamps'},
{'1.0': 'mapred.cache.localArchives',
'2.0': 'mapreduce.job.cache.local.archives'},
{'1.0': 'mapred.cache.localFiles',
'2.0': 'mapreduce.job.cache.local.files'},
{'1.0': 'mapred.child.tmp',
'2.0': 'mapreduce.task.tmp.dir'},
{'1.0': 'mapred.cluster.average.blacklist.threshold',
'2.0': 'mapreduce.jobtracker.blacklist.average.threshold'},
{'1.0': 'mapred.cluster.map.memory.mb',
'2.0': 'mapreduce.cluster.mapmemory.mb'},
{'1.0': 'mapred.cluster.max.map.memory.mb',
'2.0': 'mapreduce.jobtracker.maxmapmemory.mb'},
{'1.0': 'mapred.cluster.max.reduce.memory.mb',
'2.0': 'mapreduce.jobtracker.maxreducememory.mb'},
{'1.0': 'mapred.cluster.reduce.memory.mb',
'2.0': 'mapreduce.cluster.reducememory.mb'},
{'1.0': 'mapred.committer.job.setup.cleanup.needed',
'2.0': 'mapreduce.job.committer.setup.cleanup.needed'},
{'1.0': 'mapred.compress.map.output',
'2.0': 'mapreduce.map.output.compress'},
{'1.0': 'mapred.create.symlink',
'2.0': 'mapreduce.job.cache.symlink.create'},
{'1.0': 'mapred.data.field.separator',
'2.0': 'mapreduce.fieldsel.data.field.separator'},
{'1.0': 'mapred.debug.out.lines',
'2.0': 'mapreduce.task.debugout.lines'},
{'1.0': 'mapred.healthChecker.interval',
'2.0': 'mapreduce.tasktracker.healthchecker.interval'},
{'1.0': 'mapred.healthChecker.script.args',
'2.0': 'mapreduce.tasktracker.healthchecker.script.args'},
{'1.0': 'mapred.healthChecker.script.path',
'2.0': 'mapreduce.tasktracker.healthchecker.script.path'},
{'1.0': 'mapred.healthChecker.script.timeout',
'2.0': 'mapreduce.tasktracker.healthchecker.script.timeout'},
{'1.0': 'mapred.heartbeats.in.second',
'2.0': 'mapreduce.jobtracker.heartbeats.in.second'},
{'1.0': 'mapred.hosts',
'2.0': 'mapreduce.jobtracker.hosts.filename'},
{'1.0': 'mapred.hosts.exclude',
'2.0': 'mapreduce.jobtracker.hosts.exclude.filename'},
{'1.0': 'mapred.inmem.merge.threshold',
'2.0': 'mapreduce.reduce.merge.inmem.threshold'},
{'1.0': 'mapred.input.dir',
'2.0': 'mapreduce.input.fileinputformat.inputdir'},
{'1.0': 'mapred.input.dir.formats',
'2.0': 'mapreduce.input.multipleinputs.dir.formats'},
{'1.0': 'mapred.input.dir.mappers',
'2.0': 'mapreduce.input.multipleinputs.dir.mappers'},
{'1.0': 'mapred.input.pathFilter.class',
'2.0': 'mapreduce.input.pathFilter.class'},
{'1.0': 'mapred.jar',
'2.0': 'mapreduce.job.jar'},
{'1.0': 'mapred.job.classpath.archives',
'2.0': 'mapreduce.job.classpath.archives'},
{'1.0': 'mapred.job.classpath.files',
'2.0': 'mapreduce.job.classpath.files'},
{'1.0': 'mapred.job.id',
'2.0': 'mapreduce.job.id'},
{'1.0': 'mapred.job.map.memory.mb',
'2.0': 'mapreduce.map.memory.mb'},
{'1.0': 'mapred.job.name',
'2.0': 'mapreduce.job.name'},
{'1.0': 'mapred.job.priority',
'2.0': 'mapreduce.job.priority'},
{'1.0': 'mapred.job.queue.name',
'2.0': 'mapreduce.job.queuename'},
{'1.0': 'mapred.job.reduce.input.buffer.percent',
'2.0': 'mapreduce.reduce.input.buffer.percent'},
{'1.0': 'mapred.job.reduce.markreset.buffer.percent',
'2.0': 'mapreduce.reduce.markreset.buffer.percent'},
{'1.0': 'mapred.job.reduce.memory.mb',
'2.0': 'mapreduce.reduce.memory.mb'},
{'1.0': 'mapred.job.reduce.total.mem.bytes',
'2.0': 'mapreduce.reduce.memory.totalbytes'},
{'1.0': 'mapred.job.reuse.jvm.num.tasks',
'2.0': 'mapreduce.job.jvm.numtasks'},
{'1.0': 'mapred.job.shuffle.input.buffer.percent',
'2.0': 'mapreduce.reduce.shuffle.input.buffer.percent'},
{'1.0': 'mapred.job.shuffle.merge.percent',
'2.0': 'mapreduce.reduce.shuffle.merge.percent'},
{'1.0': 'mapred.job.tracker',
'2.0': 'mapreduce.jobtracker.address'},
{'1.0': 'mapred.job.tracker.handler.count',
'2.0': 'mapreduce.jobtracker.handler.count'},
{'1.0': 'mapred.job.tracker.history.completed.location',
'2.0': 'mapreduce.jobtracker.jobhistory.completed.location'},
{'1.0': 'mapred.job.tracker.http.address',
'2.0': 'mapreduce.jobtracker.http.address'},
{'1.0': 'mapred.job.tracker.jobhistory.lru.cache.size',
'2.0': 'mapreduce.jobtracker.jobhistory.lru.cache.size'},
{'1.0': 'mapred.job.tracker.persist.jobstatus.active',
'2.0': 'mapreduce.jobtracker.persist.jobstatus.active'},
{'1.0': 'mapred.job.tracker.persist.jobstatus.dir',
'2.0': 'mapreduce.jobtracker.persist.jobstatus.dir'},
{'1.0': 'mapred.job.tracker.persist.jobstatus.hours',
'2.0': 'mapreduce.jobtracker.persist.jobstatus.hours'},
{'1.0': 'mapred.job.tracker.retire.jobs',
'2.0': 'mapreduce.jobtracker.retirejobs'},
{'1.0': 'mapred.job.tracker.retiredjobs.cache.size',
'2.0': 'mapreduce.jobtracker.retiredjobs.cache.size'},
{'1.0': 'mapred.jobinit.threads',
'2.0': 'mapreduce.jobtracker.jobinit.threads'},
{'1.0': 'mapred.jobtracker.instrumentation',
'2.0': 'mapreduce.jobtracker.instrumentation'},
{'1.0': 'mapred.jobtracker.job.history.block.size',
'2.0': 'mapreduce.jobtracker.jobhistory.block.size'},
{'1.0': 'mapred.jobtracker.maxtasks.per.job',
'2.0': 'mapreduce.jobtracker.maxtasks.perjob'},
{'1.0': 'mapred.jobtracker.restart.recover',
'2.0': 'mapreduce.jobtracker.restart.recover'},
{'1.0': 'mapred.jobtracker.taskScheduler',
'2.0': 'mapreduce.jobtracker.taskscheduler'},
{'1.0': 'mapred.jobtracker.taskScheduler.maxRunningTasksPerJob',
'2.0': 'mapreduce.jobtracker.taskscheduler.maxrunningtasks.perjob'},
{'1.0': 'mapred.jobtracker.taskalloc.capacitypad',
'2.0': 'mapreduce.jobtracker.taskscheduler.taskalloc.capacitypad'},
{'1.0': 'mapred.join.expr',
'2.0': 'mapreduce.join.expr'},
{'1.0': 'mapred.join.keycomparator',
'2.0': 'mapreduce.join.keycomparator'},
{'1.0': 'mapred.lazy.output.format',
'2.0': 'mapreduce.output.lazyoutputformat.outputformat'},
{'1.0': 'mapred.line.input.format.linespermap',
'2.0': 'mapreduce.input.lineinputformat.linespermap'},
{'1.0': 'mapred.linerecordreader.maxlength',
'2.0': 'mapreduce.input.linerecordreader.line.maxlength'},
{'1.0': 'mapred.local.dir',
'2.0': 'mapreduce.cluster.local.dir'},
{'1.0': 'mapred.local.dir.minspacekill',
'2.0': 'mapreduce.tasktracker.local.dir.minspacekill'},
{'1.0': 'mapred.local.dir.minspacestart',
'2.0': 'mapreduce.tasktracker.local.dir.minspacestart'},
{'1.0': 'mapred.map.child.env',
'2.0': 'mapreduce.map.env'},
{'1.0': 'mapred.map.child.java.opts',
'2.0': 'mapreduce.map.java.opts'},
{'1.0': 'mapred.map.child.log.level',
'2.0': 'mapreduce.map.log.level'},
{'1.0': 'mapred.map.max.attempts',
'2.0': 'mapreduce.map.maxattempts'},
{'1.0': 'mapred.map.output.compression.codec',
'2.0': 'mapreduce.map.output.compress.codec'},
{'1.0': 'mapred.map.task.debug.script',
'2.0': 'mapreduce.map.debug.script'},
{'1.0': 'mapred.map.tasks',
'2.0': 'mapreduce.job.maps'},
{'1.0': 'mapred.map.tasks.speculative.execution',
'2.0': 'mapreduce.map.speculative'},
{'1.0': 'mapred.mapoutput.key.class',
'2.0': 'mapreduce.map.output.key.class'},
{'1.0': 'mapred.mapoutput.value.class',
'2.0': 'mapreduce.map.output.value.class'},
{'1.0': 'mapred.mapper.regex',
'2.0': 'mapreduce.mapper.regex'},
{'1.0': 'mapred.mapper.regex.group',
'2.0': 'mapreduce.mapper.regexmapper..group'},
{'1.0': 'mapred.max.map.failures.percent',
'2.0': 'mapreduce.map.failures.maxpercent'},
{'1.0': 'mapred.max.reduce.failures.percent',
'2.0': 'mapreduce.reduce.failures.maxpercent'},
{'1.0': 'mapred.max.split.size',
'2.0': 'mapreduce.input.fileinputformat.split.maxsize'},
{'1.0': 'mapred.max.tracker.blacklists',
'2.0': 'mapreduce.jobtracker.tasktracker.maxblacklists'},
{'1.0': 'mapred.max.tracker.failures',
'2.0': 'mapreduce.job.maxtaskfailures.per.tracker'},
{'1.0': 'mapred.merge.recordsBeforeProgress',
'2.0': 'mapreduce.task.merge.progress.records'},
{'1.0': 'mapred.min.split.size',
'2.0': 'mapreduce.input.fileinputformat.split.minsize'},
{'1.0': 'mapred.min.split.size.per.node',
'2.0': 'mapreduce.input.fileinputformat.split.minsize.per.node'},
{'1.0': 'mapred.min.split.size.per.rack',
'2.0': 'mapreduce.input.fileinputformat.split.minsize.per.rack'},
{'1.0': 'mapred.output.compress',
'2.0': 'mapreduce.output.fileoutputformat.compress'},
{'1.0': 'mapred.output.compression.codec',
'2.0': 'mapreduce.output.fileoutputformat.compress.codec'},
{'1.0': 'mapred.output.compression.type',
'2.0': 'mapreduce.output.fileoutputformat.compress.type'},
{'1.0': 'mapred.output.dir',
'2.0': 'mapreduce.output.fileoutputformat.outputdir'},
{'1.0': 'mapred.output.key.class',
'2.0': 'mapreduce.job.output.key.class'},
{'1.0': 'mapred.output.key.comparator.class',
'2.0': 'mapreduce.job.output.key.comparator.class'},
{'1.0': 'mapred.output.value.class',
'2.0': 'mapreduce.job.output.value.class'},
{'1.0': 'mapred.output.value.groupfn.class',
'2.0': 'mapreduce.job.output.group.comparator.class'},
{'1.0': 'mapred.permissions.supergroup',
'2.0': 'mapreduce.cluster.permissions.supergroup'},
{'1.0': 'mapred.pipes.user.inputformat',
'2.0': 'mapreduce.pipes.inputformat'},
{'1.0': 'mapred.reduce.child.env',
'2.0': 'mapreduce.reduce.env'},
{'1.0': 'mapred.reduce.child.java.opts',
'2.0': 'mapreduce.reduce.java.opts'},
{'1.0': 'mapred.reduce.child.log.level',
'2.0': 'mapreduce.reduce.log.level'},
{'1.0': 'mapred.reduce.max.attempts',
'2.0': 'mapreduce.reduce.maxattempts'},
{'1.0': 'mapred.reduce.parallel.copies',
'2.0': 'mapreduce.reduce.shuffle.parallelcopies'},
{'1.0': 'mapred.reduce.slowstart.completed.maps',
'2.0': 'mapreduce.job.reduce.slowstart.completedmaps'},
{'1.0': 'mapred.reduce.task.debug.script',
'2.0': 'mapreduce.reduce.debug.script'},
{'1.0': 'mapred.reduce.tasks',
'2.0': 'mapreduce.job.reduces'},
{'1.0': 'mapred.reduce.tasks.speculative.execution',
'2.0': 'mapreduce.reduce.speculative'},
{'1.0': 'mapred.seqbinary.output.key.class',
'2.0': 'mapreduce.output.seqbinaryoutputformat.key.class'},
{'1.0': 'mapred.seqbinary.output.value.class',
'2.0': 'mapreduce.output.seqbinaryoutputformat.value.class'},
{'1.0': 'mapred.shuffle.connect.timeout',
'2.0': 'mapreduce.reduce.shuffle.connect.timeout'},
{'1.0': 'mapred.shuffle.read.timeout',
'2.0': 'mapreduce.reduce.shuffle.read.timeout'},
{'1.0': 'mapred.skip.attempts.to.start.skipping',
'2.0': 'mapreduce.task.skip.start.attempts'},
{'1.0': 'mapred.skip.map.auto.incr.proc.count',
'2.0': 'mapreduce.map.skip.proc-count.auto-incr'},
{'1.0': 'mapred.skip.map.max.skip.records',
'2.0': 'mapreduce.map.skip.maxrecords'},
{'1.0': 'mapred.skip.on',
'2.0': 'mapreduce.job.skiprecords'},
{'1.0': 'mapred.skip.out.dir',
'2.0': 'mapreduce.job.skip.outdir'},
{'1.0': 'mapred.skip.reduce.auto.incr.proc.count',
'2.0': 'mapreduce.reduce.skip.proc-count.auto-incr'},
{'1.0': 'mapred.skip.reduce.max.skip.groups',
'2.0': 'mapreduce.reduce.skip.maxgroups'},
{'1.0': 'mapred.speculative.execution.slowNodeThreshold',
'2.0': 'mapreduce.job.speculative.slownodethreshold'},
{'1.0': 'mapred.speculative.execution.slowTaskThreshold',
'2.0': 'mapreduce.job.speculative.slowtaskthreshold'},
{'1.0': 'mapred.speculative.execution.speculativeCap',
'2.0': 'mapreduce.job.speculative.speculativecap'},
{'1.0': 'mapred.submit.replication',
'2.0': 'mapreduce.client.submit.file.replication'},
{'1.0': 'mapred.system.dir',
'2.0': 'mapreduce.jobtracker.system.dir'},
{'1.0': 'mapred.task.cache.levels',
'2.0': 'mapreduce.jobtracker.taskcache.levels'},
{'1.0': 'mapred.task.id',
'2.0': 'mapreduce.task.attempt.id'},
{'1.0': 'mapred.task.is.map',
'2.0': 'mapreduce.task.ismap'},
{'1.0': 'mapred.task.partition',
'2.0': 'mapreduce.task.partition'},
{'1.0': 'mapred.task.profile',
'2.0': 'mapreduce.task.profile'},
{'1.0': 'mapred.task.profile.maps',
'2.0': 'mapreduce.task.profile.maps'},
{'1.0': 'mapred.task.profile.params',
'2.0': 'mapreduce.task.profile.params'},
{'1.0': 'mapred.task.profile.reduces',
'2.0': 'mapreduce.task.profile.reduces'},
{'1.0': 'mapred.task.timeout',
'2.0': 'mapreduce.task.timeout'},
{'1.0': 'mapred.task.tracker.http.address',
'2.0': 'mapreduce.tasktracker.http.address'},
{'1.0': 'mapred.task.tracker.report.address',
'2.0': 'mapreduce.tasktracker.report.address'},
{'1.0': 'mapred.task.tracker.task-controller',
'2.0': 'mapreduce.tasktracker.taskcontroller'},
{'1.0': 'mapred.tasktracker.dns.interface',
'2.0': 'mapreduce.tasktracker.dns.interface'},
{'1.0': 'mapred.tasktracker.dns.nameserver',
'2.0': 'mapreduce.tasktracker.dns.nameserver'},
{'1.0': 'mapred.tasktracker.events.batchsize',
'2.0': 'mapreduce.tasktracker.events.batchsize'},
{'1.0': 'mapred.tasktracker.expiry.interval',
'2.0': 'mapreduce.jobtracker.expire.trackers.interval'},
{'1.0': 'mapred.tasktracker.indexcache.mb',
'2.0': 'mapreduce.tasktracker.indexcache.mb'},
{'1.0': 'mapred.tasktracker.instrumentation',
'2.0': 'mapreduce.tasktracker.instrumentation'},
{'1.0': 'mapred.tasktracker.map.tasks.maximum',
'2.0': 'mapreduce.tasktracker.map.tasks.maximum'},
{'1.0': 'mapred.tasktracker.memory_calculator_plugin',
'2.0': 'mapreduce.tasktracker.resourcecalculatorplugin'},
{'1.0': 'mapred.tasktracker.memorycalculatorplugin',
'2.0': 'mapreduce.tasktracker.resourcecalculatorplugin'},
{'1.0': 'mapred.tasktracker.reduce.tasks.maximum',
'2.0': 'mapreduce.tasktracker.reduce.tasks.maximum'},
{'1.0': 'mapred.tasktracker.taskmemorymanager.monitoring-interval',
'2.0': 'mapreduce.tasktracker.taskmemorymanager.monitoringinterval'},
{'1.0': 'mapred.tasktracker.tasks.sleeptime-before-sigkill',
'2.0': 'mapreduce.tasktracker.tasks.sleeptimebeforesigkill'},
{'1.0': 'mapred.temp.dir',
'2.0': 'mapreduce.cluster.temp.dir'},
{'1.0': 'mapred.text.key.comparator.options',
'2.0': 'mapreduce.partition.keycomparator.options'},
{'1.0': 'mapred.text.key.partitioner.options',
'2.0': 'mapreduce.partition.keypartitioner.options'},
{'1.0': 'mapred.textoutputformat.separator',
'2.0': 'mapreduce.output.textoutputformat.separator'},
{'1.0': 'mapred.tip.id',
'2.0': 'mapreduce.task.id'},
{'1.0': 'mapred.used.genericoptionsparser',
'2.0': 'mapreduce.client.genericoptionsparser.used'},
{'1.0': 'mapred.userlog.limit.kb',
'2.0': 'mapreduce.task.userlog.limit.kb'},
{'1.0': 'mapred.userlog.retain.hours',
'2.0': 'mapreduce.job.userlog.retain.hours'},
{'1.0': 'mapred.work.output.dir',
'2.0': 'mapreduce.task.output.dir'},
{'1.0': 'mapred.working.dir',
'2.0': 'mapreduce.job.working.dir'},
{'1.0': 'mapreduce.combine.class',
'2.0': 'mapreduce.job.combine.class'},
{'1.0': 'mapreduce.inputformat.class',
'2.0': 'mapreduce.job.inputformat.class'},
{'1.0': 'mapreduce.jobtracker.permissions.supergroup',
'2.0': 'mapreduce.cluster.permissions.supergroup'},
{'1.0': 'mapreduce.map.class',
'2.0': 'mapreduce.job.map.class'},
{'1.0': 'mapreduce.outputformat.class',
'2.0': 'mapreduce.job.outputformat.class'},
{'1.0': 'mapreduce.partitioner.class',
'2.0': 'mapreduce.job.partitioner.class'},
{'1.0': 'mapreduce.reduce.class',
'2.0': 'mapreduce.job.reduce.class'},
{'1.0': 'min.num.spills.for.combine',
'2.0': 'mapreduce.map.combine.minspills'},
{'1.0': 'reduce.output.key.value.fields.spec',
'2.0': 'mapreduce.fieldsel.reduce.output.key.value.fields.spec'},
{'1.0': 'security.job.submission.protocol.acl',
'2.0': 'security.job.client.protocol.acl'},
{'1.0': 'security.task.umbilical.protocol.acl',
'2.0': 'security.job.task.protocol.acl'},
{'1.0': 'sequencefile.filter.class',
'2.0': 'mapreduce.input.sequencefileinputfilter.class'},
{'1.0': 'sequencefile.filter.frequency',
'2.0': 'mapreduce.input.sequencefileinputfilter.frequency'},
{'1.0': 'sequencefile.filter.regex',
'2.0': 'mapreduce.input.sequencefileinputfilter.regex'},
{'1.0': 'session.id',
'2.0': 'dfs.metrics.session-id'},
{'1.0': 'slave.host.name',
'2.0': 'dfs.datanode.hostname'},
{'1.0': 'slave.host.name',
'2.0': 'mapreduce.tasktracker.host.name'},
{'1.0': 'tasktracker.contention.tracking',
'2.0': 'mapreduce.tasktracker.contention.tracking'},
{'1.0': 'tasktracker.http.threads',
'2.0': 'mapreduce.tasktracker.http.threads'},
{'1.0': 'topology.node.switch.mapping.impl',
'2.0': 'net.topology.node.switch.mapping.impl'},
{'1.0': 'topology.script.file.name',
'2.0': 'net.topology.script.file.name'},
{'1.0': 'topology.script.number.args',
'2.0': 'net.topology.script.number.args'},
{'1.0': 'user.name',
'2.0': 'mapreduce.job.user.name'},
{'1.0': 'webinterface.private.actions',
'2.0': 'mapreduce.jobtracker.webinterface.trusted'},
]
# Handle compatibility for 0.x versions of Hadoop too
for jobconf_dict in _JOBCONF_DICT_LIST:
jobconf_dict['0.20'] = jobconf_dict['1.0']
jobconf_dict['0.21'] = jobconf_dict['2.0']
def _dict_list_to_compat_map(dict_list):
# compat_map = {
# ...
# a: {'1.0': a, '2.0': b}
# ..
# }
compat_map = {}
for version_dict in dict_list:
for value in version_dict.values():
compat_map[value] = version_dict
return compat_map
_JOBCONF_MAP = _dict_list_to_compat_map(_JOBCONF_DICT_LIST)
def jobconf_from_env(variable, default=None):
"""Get the value of a jobconf variable from the runtime environment.
For example, a :py:class:`~mrjob.job.MRJob` could use
``jobconf_from_env('map.input.file')`` to get the name of the file a
mapper is reading input from.
If the name of the jobconf variable is different in different versions of
Hadoop (e.g. in Hadoop 2.0, ``map.input.file`` is
``mapreduce.map.input.file``), we'll automatically try all variants before
giving up.
Return *default* if that jobconf variable isn't set.
"""
# try variable verbatim first
name = variable.replace('.', '_')
if name in os.environ:
return os.environ[name]
# try alternatives (arbitrary order)
for var in _JOBCONF_MAP.get(variable, {}).values():
name = var.replace('.', '_')
if name in os.environ:
return os.environ[name]
return default
def jobconf_from_dict(jobconf, name, default=None):
"""Get the value of a jobconf variable from the given dictionary.
:param dict jobconf: jobconf dictionary
:param string name: name of the jobconf variable (e.g. ``'user.name'``)
:param default: fallback value
If the name of the jobconf variable is different in different versions of
Hadoop (e.g. in Hadoop 2, ``map.input.file`` is
``mapreduce.map.input.file``), we'll automatically try all variants before
giving up.
Return *default* if that jobconf variable isn't set """
if name in jobconf:
return jobconf[name]
# try alternatives (arbitrary order)
for alternative in _JOBCONF_MAP.get(name, {}).values():
if alternative in jobconf:
return jobconf[alternative]
return default
def map_version(version, version_map):
"""Allows you to look up something by version (e.g. which jobconf variable
to use, specifying only the versions where that value changed.
*version* is a string
*version_map* is a map from version (as a string) that a value changed
to the new value.
For efficiency, *version_map* can also be a list of tuples of
``(LooseVersion(version_as_string), value)``, with oldest versions first.
If *version* is less than any version in *version_map*, use the value for
the earliest version in *version_map*.
"""
if version is None:
raise TypeError
if not version_map:
raise ValueError
if isinstance(version_map, dict):
version_map = sorted((LooseVersion(k), v)
for k, v in version_map.items())
req_version = LooseVersion(version)
for min_version, value in reversed(version_map):
if req_version >= min_version:
return value
else:
return version_map[0][1]
def translate_jobconf(variable, version):
"""Translate *variable* to Hadoop version *version*. If it's not
a variable we recognize, leave as-is.
"""
if version is None:
raise TypeError
if variable in _JOBCONF_MAP:
return map_version(version, _JOBCONF_MAP[variable])
else:
return variable
def translate_jobconf_for_all_versions(variable):
"""Get all known variants of the given jobconf variable.
Unlike :py:func:`translate_jobconf`, returns a list."""
return sorted(
set([variable] + list(_JOBCONF_MAP.get(variable, {}).values())))
def translate_jobconf_dict(jobconf, hadoop_version=None):
"""Translates the configuration property name to match those that
are accepted in hadoop_version. Prints a warning message if any
configuration property name does not match the name in the hadoop
version. Combines the original jobconf with the translated jobconf.
:return: a map consisting of the original and translated configuration
property names and values.
"""
translated_jobconf = jobconf.copy()
translation_warnings = {}
for variable, value in jobconf.items():
if hadoop_version:
variants = [translate_jobconf(variable, hadoop_version)]
else:
variants = translate_jobconf_for_all_versions(variable)
for variant in variants:
if variant in jobconf:
# this happens if variant == variable or
# if the variant was in jobconf to start with
continue
translated_jobconf[variant] = value
if hadoop_version:
translation_warnings[variable] = variant
if translation_warnings:
log.warning("Detected hadoop configuration property names that"
" do not match hadoop version %s:"
"\nThe have been translated as follows\n %s",
hadoop_version,
'\n'.join([
"%s: %s" % (variable, variant) for variable, variant
in sorted(translation_warnings.items())]))
return translated_jobconf
def uses_yarn(version):
"""Basically, is this Hadoop 2? This also handles versions in the
zero series (0.23+) where YARN originated."""
return (version_gte(version, '2') or
version_gte(version, '0.23') and not version_gte(version, '1'))
def version_gte(version, cmp_version_str):
"""Return ``True`` if version >= *cmp_version_str*."""
if not isinstance(version, string_types):
raise TypeError('%r is not a string' % version)
if not isinstance(cmp_version_str, string_types):
raise TypeError('%r is not a string' % cmp_version_str)
return LooseVersion(version) >= LooseVersion(cmp_version_str)