Skip to content

Commit 1b5926d

Browse files
author
James Lee
committed
improve several pair RDD examples
1 parent 87fe8ce commit 1b5926d

File tree

6 files changed

+27
-14
lines changed

6 files changed

+27
-14
lines changed

src/main/java/com/sparkTutorial/pairRdd/filter/AirportsNotInUsaSolution.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.sparkTutorial.pairRdd.filter;
22

3+
import com.sparkTutorial.rdd.commons.Utils;
34
import org.apache.spark.SparkConf;
45
import org.apache.spark.api.java.JavaPairRDD;
56
import org.apache.spark.api.java.JavaRDD;
@@ -25,7 +26,7 @@ public static void main(String[] args) throws Exception {
2526
}
2627

2728
private static PairFunction<String, String, String> getAirportNameAndCountryNamePair() {
28-
return (PairFunction<String, String, String>) line -> new Tuple2<>(line.split(",")[1],
29-
line.split(",")[3]);
29+
return (PairFunction<String, String, String>) line -> new Tuple2<>(line.split(Utils.COMMA_DELIMITER)[1],
30+
line.split(Utils.COMMA_DELIMITER)[3]);
3031
}
3132
}

src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsProblem.java renamed to src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsByCountryProblem.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
package com.sparkTutorial.pairRdd.groupbykey;
22

3-
public class AirportsProblem {
3+
public class AirportsByCountryProblem {
44

55
public static void main(String[] args) throws Exception {
66

7-
/* TODO: Create a Spark program to read the airport data from in/airports.text, output the the list of the names of the airports located in each country.
7+
/* Create a Spark program to read the airport data from in/airports.text,
8+
output the the list of the names of the airports located in each country.
89
910
Each row of the input file contains the following columns:
10-
11-
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code, ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
12+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
1213
1314
Sample output:
1415

src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsSolution.java renamed to src/main/java/com/sparkTutorial/pairRdd/groupbykey/AirportsByCountrySolution.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
package com.sparkTutorial.pairRdd.groupbykey;
22

3+
import com.sparkTutorial.rdd.commons.Utils;
34
import org.apache.spark.SparkConf;
45
import org.apache.spark.api.java.JavaPairRDD;
56
import org.apache.spark.api.java.JavaRDD;
67
import org.apache.spark.api.java.JavaSparkContext;
78
import org.apache.spark.api.java.function.PairFunction;
89
import scala.Tuple2;
910

10-
public class AirportsSolution {
11+
public class AirportsByCountrySolution {
1112

1213
public static void main(String[] args) throws Exception {
1314

@@ -17,7 +18,10 @@ public static void main(String[] args) throws Exception {
1718

1819
JavaRDD<String> lines = sc.textFile("in/airports.text");
1920

20-
JavaPairRDD<String, String> CountryAndAirportNameAndPair = lines.mapToPair((PairFunction<String, String, String>) airport -> new Tuple2<>(airport.split(",")[3], airport.split(",")[1]));
21+
JavaPairRDD<String, String> CountryAndAirportNameAndPair =
22+
lines.mapToPair((PairFunction<String, String, String>) airport ->
23+
new Tuple2<>(airport.split(Utils.COMMA_DELIMITER)[3],
24+
airport.split(Utils.COMMA_DELIMITER)[1]));
2125

2226
JavaPairRDD<String, Iterable<String>> AirportsByCountry = CountryAndAirportNameAndPair.groupByKey();
2327

src/main/java/com/sparkTutorial/pairRdd/mapValues/AirportsUppercaseSolution.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.sparkTutorial.pairRdd.mapValues;
22

3+
import com.sparkTutorial.rdd.commons.Utils;
34
import org.apache.spark.SparkConf;
45
import org.apache.spark.api.java.JavaPairRDD;
56
import org.apache.spark.api.java.JavaRDD;
@@ -25,7 +26,7 @@ public static void main(String[] args) throws Exception {
2526
}
2627

2728
private static PairFunction<String, String, String> getAirportNameAndCountryNamePair() {
28-
return (PairFunction<String, String, String>) line -> new Tuple2<>(line.split(",")[1],
29-
line.split(",")[3]);
29+
return (PairFunction<String, String, String>) line -> new Tuple2<>(line.split(Utils.COMMA_DELIMITER)[1],
30+
line.split(Utils.COMMA_DELIMITER)[3]);
3031
}
3132
}

src/main/java/com/sparkTutorial/pairRdd/sort/sortbykey/SortedWorldCountProblem.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
public class SortedWorldCountProblem {
55

6-
/* TODO: Create a Spark program to read the an article from in/word_count.text, output the number of occurrence of each word in descending order.
6+
/* Create a Spark program to read the an article from in/word_count.text,
7+
output the number of occurrence of each word in descending order.
78
89
Sample output:
910

src/main/java/com/sparkTutorial/pairRdd/sort/sortbykey/SortedWorldCountSolution.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@ public static void main(String[] args) throws Exception {
2424
JavaRDD<String> lines = sc.textFile("in/word_count.text");
2525
JavaRDD<String> wordRdd = lines.flatMap(line -> Arrays.asList(line.split(" ")).iterator());
2626

27-
JavaPairRDD<String, Integer> wordPairRdd = wordRdd.mapToPair((PairFunction<String, String, Integer>) word -> new Tuple2<>(word, 1));
27+
JavaPairRDD<String, Integer> wordPairRdd = wordRdd.mapToPair(
28+
(PairFunction<String, String, Integer>) word -> new Tuple2<>(word, 1));
2829

2930
JavaPairRDD<String, Integer> wordToCountPairs = wordPairRdd.reduceByKey((Function2<Integer, Integer, Integer>) (x, y) -> x + y);
3031

31-
JavaPairRDD<Integer, String> countToWordParis = wordToCountPairs.mapToPair((PairFunction<Tuple2<String, Integer>, Integer, String>) wordToCount -> new Tuple2<>(wordToCount._2(), wordToCount._1()));
32+
JavaPairRDD<Integer, String> countToWordParis = wordToCountPairs.mapToPair(
33+
(PairFunction<Tuple2<String, Integer>, Integer, String>) wordToCount -> new Tuple2<>(wordToCount._2(),
34+
wordToCount._1()));
3235

3336
JavaPairRDD<Integer, String> sortedCountToWordParis = countToWordParis.sortByKey(false);
3437

35-
JavaPairRDD<String, Integer> sortedWordToCountPairs = sortedCountToWordParis.mapToPair((PairFunction<Tuple2<Integer, String>, String, Integer>) countToWord -> new Tuple2<>(countToWord._2(), countToWord._1()));
38+
JavaPairRDD<String, Integer> sortedWordToCountPairs = sortedCountToWordParis
39+
.mapToPair((PairFunction<Tuple2<Integer, String>, String, Integer>) countToWord -> new Tuple2<>(countToWord._2(),
40+
countToWord._1()));
3641

3742
for (Tuple2<String, Integer> wordToCount : sortedWordToCountPairs.collect()) {
3843
System.out.println(wordToCount._1() + " : " + wordToCount._2());

0 commit comments

Comments
 (0)