Skip to content

Commit 50dcbc6

Browse files
author
James Lee
committed
adopt
1 parent 8ffb3e5 commit 50dcbc6

File tree

8 files changed

+42
-79
lines changed

8 files changed

+42
-79
lines changed

rdd/WordCount.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import sys
2+
from pyspark import SparkContext
3+
4+
if __name__ == "__main__":
5+
sc = SparkContext("local", "word count")
6+
lines = sc.textFile("in/word_count.text")
7+
words = lines.flatMap(lambda line: line.split(" "))
8+
wordCounts = words.countByValue()
9+
for word, count in wordCounts.items():
10+
print(word, count)

rdd/WordCount.scala

Lines changed: 0 additions & 22 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
import sys
3+
from pyspark import SparkContext
4+
5+
if __name__ == "__main__":
6+
7+
'''
8+
Create a Spark program to read the first 100 prime numbers from in/prime_nums.text,
9+
print the sum of those numbers to console.
10+
Each row of the input file contains 10 prime numbers separated by spaces.
11+
'''

rdd/sumOfNumbers/SumOfNumbersProblem.scala

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import sys
2+
from pyspark import SparkContext
3+
4+
if __name__ == "__main__":
5+
sc = SparkContext("local", "primeNumbers")
6+
lines = sc.textFile("in/prime_nums.text")
7+
numbers = lines.flatMap(lambda line: line.split("\t"))
8+
validNumbers = numbers.filter(lambda number: number)
9+
intNumbers = validNumbers.map(lambda number: int(number))
10+
print("Sum is: ")
11+
print(intNumbers.reduce(lambda x, y: x + y))

rdd/sumOfNumbers/SumOfNumbersSolution.scala

Lines changed: 0 additions & 25 deletions
This file was deleted.

rdd/take/TakeExample.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import sys
2+
from pyspark import SparkContext
3+
4+
if __name__ == "__main__":
5+
sc = SparkContext("local", "take")
6+
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
7+
wordRdd = sc.parallelize(inputWords)
8+
words = wordRdd.take(3)
9+
for word in words:
10+
print(word)

rdd/take/TakeExample.scala

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)