jleetutorial
diff --git a/‎rdd/WordCount.py
Lines changed: 10 additions & 0 deletions b/‎rdd/WordCount.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎rdd/WordCount.scala
Lines changed: 0 additions & 22 deletions b/‎rdd/WordCount.scala
Lines changed: 0 additions & 22 deletions
diff --git a/‎rdd/sumOfNumbers/SumOfNumbersProblem.py
Lines changed: 11 additions & 0 deletions b/‎rdd/sumOfNumbers/SumOfNumbersProblem.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎rdd/sumOfNumbers/SumOfNumbersProblem.scala
Lines changed: 0 additions & 13 deletions b/‎rdd/sumOfNumbers/SumOfNumbersProblem.scala
Lines changed: 0 additions & 13 deletions
diff --git a/‎rdd/sumOfNumbers/SumOfNumbersSolution.py
Lines changed: 11 additions & 0 deletions b/‎rdd/sumOfNumbers/SumOfNumbersSolution.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎rdd/sumOfNumbers/SumOfNumbersSolution.scala
Lines changed: 0 additions & 25 deletions b/‎rdd/sumOfNumbers/SumOfNumbersSolution.scala
Lines changed: 0 additions & 25 deletions
diff --git a/‎rdd/take/TakeExample.py
Lines changed: 10 additions & 0 deletions b/‎rdd/take/TakeExample.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎rdd/take/TakeExample.scala
Lines changed: 0 additions & 19 deletions b/‎rdd/take/TakeExample.scala
Lines changed: 0 additions & 19 deletions
@@ -0,0 +1,10 @@
+import sys
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+	sc = SparkContext("local", "word count")
+  	lines = sc.textFile("in/word_count.text")
+  	words = lines.flatMap(lambda line: line.split(" "))
+  	wordCounts = words.countByValue()
+  	for word, count in wordCounts.items():
+  		print(word, count)
@@ -0,0 +1,11 @@
+
+import sys
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+
+	'''
+    Create a Spark program to read the first 100 prime numbers from in/prime_nums.text,
+    print the sum of those numbers to console.
+    Each row of the input file contains 10 prime numbers separated by spaces.
+    '''
@@ -0,0 +1,11 @@
+import sys
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext("local", "primeNumbers")
+    lines = sc.textFile("in/prime_nums.text")
+    numbers = lines.flatMap(lambda line: line.split("\t"))
+    validNumbers = numbers.filter(lambda number: number)
+    intNumbers = validNumbers.map(lambda number: int(number))
+    print("Sum is: ")
+    print(intNumbers.reduce(lambda x, y: x + y))
@@ -0,0 +1,10 @@
+import sys
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+	sc = SparkContext("local", "take")
+	inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
+	wordRdd = sc.parallelize(inputWords)
+	words = wordRdd.take(3)
+	for word in words: 
+		print(word)