Skip to content

Commit a6dc078

Browse files
author
Pedro Bernardo
committed
Changed SortedWordCountSolution.py to use the sortBy operation
1 parent f9c885f commit a6dc078

File tree

1 file changed

+8
-10
lines changed

1 file changed

+8
-10
lines changed
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "wordCounts")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName("wordCounts").setMaster("local[*]")
5+
sc = SparkContext(conf = conf)
6+
77
lines = sc.textFile("in/word_count.text")
88
wordRdd = lines.flatMap(lambda line: line.split(" "))
99

1010
wordPairRdd = wordRdd.map(lambda word: (word, 1))
1111
wordToCountPairs = wordPairRdd.reduceByKey(lambda x, y: x + y)
1212

13-
countToWordParis = wordToCountPairs.map(lambda wordToCount: (wordToCount[1], wordToCount[0]))
14-
15-
sortedCountToWordParis = countToWordParis.sortByKey(ascending=False)
13+
sortedWordCountPairs = wordToCountPairs \
14+
.sortBy(lambda wordCount: wordCount[1], ascending=False)
1615

17-
sortedWordToCountPairs = sortedCountToWordParis.map(lambda countToWord: (countToWord[1], countToWord[0]))
18-
19-
for word, count in sortedWordToCountPairs.collect():
16+
for word, count in sortedWordCountPairs.collect():
2017
print("{} : {}".format(word, count))
18+

0 commit comments

Comments
 (0)