Skip to content

Commit 3ec564f

Browse files
authored
Merge pull request jleetutorial#5 from jleetutorial/pedro-changes_for_videos
Pedro changes for videos
2 parents f27836c + 76f5cce commit 3ec564f

20 files changed

+85
-353
lines changed

build.gradle

Lines changed: 0 additions & 33 deletions
This file was deleted.

gradle/wrapper/gradle-wrapper.jar

-52.4 KB
Binary file not shown.

gradle/wrapper/gradle-wrapper.properties

Lines changed: 0 additions & 6 deletions
This file was deleted.

gradlew

Lines changed: 0 additions & 160 deletions
This file was deleted.

gradlew.bat

Lines changed: 0 additions & 90 deletions
This file was deleted.

pairRdd/aggregation/reducebykey/WordCount.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "wordCounts")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName("wordCounts").setMaster("local[3]")
5+
sc = SparkContext(conf = conf)
76

87
lines = sc.textFile("in/word_count.text")
98
wordRdd = lines.flatMap(lambda line: line.split(" "))

pairRdd/create/PairRddFromRegularRdd.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "create")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName("create").setMaster("local")
5+
sc = SparkContext(conf = conf)
76

87
inputStrings = ["Lily 23", "Jack 29", "Mary 29", "James 8"]
98
regularRDDs = sc.parallelize(inputStrings)

pairRdd/create/PairRddFromTupleList.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "create")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName("create").setMaster("local")
5+
sc = SparkContext(conf = conf)
76

87
tuples = [("Lily", 23), ("Jack", 29), ("Mary", 29), ("James", 8)]
98
pairRDD = sc.parallelize(tuples)
Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "GroupByKeyVsReduceByKey")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName('GroupByKeyVsReduceByKey').setMaster("local[*]")
5+
sc = SparkContext(conf = conf)
76

87
words = ["one", "two", "two", "three", "three", "three"]
98
wordsPairRdd = sc.parallelize(words).map(lambda word: (word, 1))
109

11-
wordCountsWithReduceByKey = wordsPairRdd.reduceByKey(lambda x, y: x + y).collect()
10+
wordCountsWithReduceByKey = wordsPairRdd \
11+
.reduceByKey(lambda x, y: x + y) \
12+
.collect()
1213
print("wordCountsWithReduceByKey: {}".format(list(wordCountsWithReduceByKey)))
1314

1415
wordCountsWithGroupByKey = wordsPairRdd \
1516
.groupByKey() \
16-
.mapValues(lambda intIterable: len(intIterable)) \
17+
.mapValues(len) \
1718
.collect()
1819
print("wordCountsWithGroupByKey: {}".format(list(wordCountsWithGroupByKey)))
20+
21+

pairRdd/join/JoinOperations.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from pyspark import SparkContext
1+
from pyspark import SparkContext, SparkConf
22

33
if __name__ == "__main__":
4-
5-
sc = SparkContext("local", "JoinOperations")
6-
sc.setLogLevel("ERROR")
4+
conf = SparkConf().setAppName("JoinOperations").setMaster("local[1]")
5+
sc = SparkContext(conf = conf)
76

87
ages = sc.parallelize([("Tom", 29), ("John", 22)])
98
addresses = sc.parallelize([("James", "USA"), ("John", "UK")])

0 commit comments

Comments
 (0)