Skip to content

Commit 88c0a6b

Browse files
author
James Lee
committed
improve filter and mapValues examples
1 parent 2402739 commit 88c0a6b

File tree

6 files changed

+51
-46
lines changed

6 files changed

+51
-46
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package com.sparkTutorial.pairRdd.filter;
2+
3+
public class AirportsNotInUsaProblem {
4+
5+
public static void main(String[] args) throws Exception {
6+
7+
/* Create a Spark program to read the airport data from in/airports.text;
8+
generate a pair RDD with airport name being the key and country name being the value.
9+
Then remove all the airports which are NOT located in United States and output the pair RDD to out/airports_not_in_usa_pair_rdd.text
10+
11+
Each row of the input file contains the following columns:
12+
Airport ID, Name of airport, Main city served by airport, Country where airport is located,
13+
IATA/FAA code, ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
14+
15+
Sample output:
16+
17+
("Kamloops", "Canada")
18+
("Wewak Intl", "Papua New Guinea")
19+
...
20+
*/
21+
}
22+
}

src/main/java/com/sparkTutorial/pairRdd/filter/AirportsSolution.java renamed to src/main/java/com/sparkTutorial/pairRdd/filter/AirportsNotInUsaSolution.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import org.apache.spark.api.java.function.PairFunction;
88
import scala.Tuple2;
99

10-
public class AirportsSolution {
10+
public class AirportsNotInUsaSolution {
1111

1212
public static void main(String[] args) throws Exception {
1313

14-
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[*]");
14+
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local");
1515

1616
JavaSparkContext sc = new JavaSparkContext(conf);
1717

@@ -21,7 +21,7 @@ public static void main(String[] args) throws Exception {
2121

2222
JavaPairRDD<String, String> airportsNotInUSA = airportPairRDD.filter(keyValue -> !keyValue._2().equals("\"United States\""));
2323

24-
airportsNotInUSA.saveAsTextFile("out/airports_pair_rdd.text");
24+
airportsNotInUSA.saveAsTextFile("out/airports_not_in_usa_pair_rdd.text");
2525
}
2626

2727
private static PairFunction<String, String, String> getAirportNameAndCountryNamePair() {

src/main/java/com/sparkTutorial/pairRdd/filter/AirportsProblem.java

Lines changed: 0 additions & 20 deletions
This file was deleted.

src/main/java/com/sparkTutorial/pairRdd/mapValues/AirportsProblem.java

Lines changed: 0 additions & 20 deletions
This file was deleted.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package com.sparkTutorial.pairRdd.mapValues;
2+
3+
public class AirportsUppercaseProblem {
4+
5+
public static void main(String[] args) throws Exception {
6+
7+
/* Create a Spark program to read the airport data from in/airports.text, generate a pair RDD with airport name
8+
being the key and country name being the value. Then convert the country name to uppercase and
9+
output the pair RDD to out/airports_uppercase.text
10+
11+
Each row of the input file contains the following columns:
12+
13+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
14+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
15+
16+
Sample output:
17+
18+
("Kamloops", "CANADA")
19+
("Wewak Intl", "PAPUA NEW GUINEA")
20+
...
21+
*/
22+
}
23+
}

src/main/java/com/sparkTutorial/pairRdd/mapValues/AirportsSolution.java renamed to src/main/java/com/sparkTutorial/pairRdd/mapValues/AirportsUppercaseSolution.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import org.apache.spark.api.java.function.PairFunction;
88
import scala.Tuple2;
99

10-
public class AirportsSolution {
10+
public class AirportsUppercaseSolution {
1111

1212
public static void main(String[] args) throws Exception {
1313

14-
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[*]");
14+
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local");
1515

1616
JavaSparkContext sc = new JavaSparkContext(conf);
1717

@@ -21,7 +21,7 @@ public static void main(String[] args) throws Exception {
2121

2222
JavaPairRDD<String, String> upperCase = airportPairRDD.mapValues(countryName -> countryName.toUpperCase());
2323

24-
upperCase.saveAsTextFile("out/airports_pair_rdd_value_uppercase.text");
24+
upperCase.saveAsTextFile("out/airports_uppercase.text");
2525
}
2626

2727
private static PairFunction<String, String, String> getAirportNameAndCountryNamePair() {

0 commit comments

Comments
 (0)