Skip to content

Commit f637b18

Browse files
author
Pedro Bernardo
committed
Setting log level to ERROR in scripts that prints to the standard output
1 parent 131e3cf commit f637b18

File tree

8 files changed

+23
-18
lines changed

8 files changed

+23
-18
lines changed

rdd/WordCount.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
from pyspark import SparkContext
33

44
if __name__ == "__main__":
5-
sc = SparkContext("local", "word count")
6-
lines = sc.textFile("in/word_count.text")
7-
words = lines.flatMap(lambda line: line.split(" "))
8-
wordCounts = words.countByValue()
9-
for word, count in wordCounts.items():
10-
print(word, count)
5+
sc = SparkContext("local", "word count")
6+
sc.setLogLevel("ERROR")
7+
lines = sc.textFile("in/word_count.text")
8+
words = lines.flatMap(lambda line: line.split(" "))
9+
wordCounts = words.countByValue()
10+
for word, count in wordCounts.items():
11+
print(word, count)

rdd/collect/CollectExample.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
if __name__ == "__main__":
44
sc = SparkContext("local", "collect")
5+
sc.setLogLevel("ERROR")
56
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
67
wordRdd = sc.parallelize(inputWords)
78
words = wordRdd.collect()

rdd/count/CountExample.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
if __name__ == "__main__":
44
sc = SparkContext("local", "count")
5+
sc.setLogLevel("ERROR")
56
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
67
wordRdd = sc.parallelize(inputWords)
78
print("Count: {}".format(wordRdd.count()))
89
worldCountByValue = wordRdd.countByValue()
910
print("CountByValue: ")
1011
for word, count in worldCountByValue.items():
11-
print("{} : {}".format(word, count))
12+
print("{} : {}".format(word, count))

rdd/nasaApacheWebLogs/UnionLogSolutions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from pyspark import SparkContext
22

3-
def isNotHeader(line:str):
3+
def isNotHeader(line: str):
44
return not (line.startswith("host") and "bytes" in line)
55

66
if __name__ == "__main__":

rdd/reduce/ReduceExample.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
if __name__ == "__main__":
44
sc = SparkContext("local", "reduce")
5+
sc.setLogLevel("ERROR")
56
inputIntegers = [1, 2, 3, 4, 5]
67
integerRdd = sc.parallelize(inputIntegers)
78
product = integerRdd.reduce(lambda x, y: x * y)
Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
21
import sys
32
from pyspark import SparkContext
43

54
if __name__ == "__main__":
65

7-
'''
6+
'''
87
Create a Spark program to read the first 100 prime numbers from in/prime_nums.text,
98
print the sum of those numbers to console.
109
Each row of the input file contains 10 prime numbers separated by spaces.
11-
'''
10+
'''

rdd/sumOfNumbers/SumOfNumbersSolution.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33

44
if __name__ == "__main__":
55
sc = SparkContext("local", "primeNumbers")
6+
sc.setLogLevel("ERROR")
67
lines = sc.textFile("in/prime_nums.text")
78
numbers = lines.flatMap(lambda line: line.split("\t"))
89
validNumbers = numbers.filter(lambda number: number)
910
intNumbers = validNumbers.map(lambda number: int(number))
1011
print("Sum is: ")
11-
print(intNumbers.reduce(lambda x, y: x + y))
12+
print(intNumbers.reduce(lambda x, y: x + y))

rdd/take/TakeExample.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
from pyspark import SparkContext
33

44
if __name__ == "__main__":
5-
sc = SparkContext("local", "take")
6-
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
7-
wordRdd = sc.parallelize(inputWords)
8-
words = wordRdd.take(3)
9-
for word in words:
10-
print(word)
5+
sc = SparkContext("local", "take")
6+
sc.setLogLevel("ERROR")
7+
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
8+
wordRdd = sc.parallelize(inputWords)
9+
words = wordRdd.take(3)
10+
for word in words:
11+
print(word)

0 commit comments

Comments
 (0)