GildedRose-Refactoring-Kata/.venv/lib/python3.12/site-packages/mrjob/examples/mr_sparkaboom.py
2025-06-22 13:36:01 +05:30

43 lines
1.3 KiB
Python

# Copyright 2016 Yelp
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mrjob.job import MRJob
class MRSparKaboom(MRJob):
def spark(self, input_path, output_path):
# Spark may not be available where script is launched
from pyspark import SparkContext
sc = SparkContext(appName='mrjob Spark wordcount script')
lines = sc.textFile(input_path)
def kaboom(line):
raise Exception('KABOOM')
# make sure the exception happens inside Spark, not just at the
# top-level client
# strangely, Spark 1.2 thinks this is all good. Probably not something
# we can fix.
kaboomed_lines = lines.flatMap(kaboom)
kaboomed_lines.saveAsTextFile(output_path)
sc.stop()
if __name__ == '__main__':
MRSparKaboom.run()