Skip to content

Commit 51a643e

Browse files
author
James Lee
committed
reformat TypedDataset
1 parent 3be72e8 commit 51a643e

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/main/java/com/sparkTutorial/sparkSql/TypedDataset.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@ public class TypedDataset {
1717
public static void main(String[] args) throws Exception {
1818

1919
Logger.getLogger("org").setLevel(Level.ERROR);
20-
SparkSession session = SparkSession.builder().appName("StackOverFlowSurvey").master("local[1]").getOrCreate();
20+
SparkSession session = SparkSession.builder().appName("StackOverFlowSurvey").master("local[*]").getOrCreate();
2121

2222
DataFrameReader dataFrameReader = session.read();
2323

2424
Dataset<Row> responses = dataFrameReader.option("header","true").csv("in/2016-stack-overflow-survey-responses.csv");
2525

26-
Dataset<Row> responseWithSelectedColumns = responses.select(col("country"), col("age_midpoint").as("ageMidPoint").cast("integer"), col("occupation"), col("salary_midpoint").as("salaryMidPoint").cast("integer"));
26+
Dataset<Row> responseWithSelectedColumns = responses.select(
27+
col("country"),
28+
col("age_midpoint").as("ageMidPoint").cast("integer"),
29+
col("occupation"),
30+
col("salary_midpoint").as("salaryMidPoint").cast("integer"));
2731

2832
Dataset<Response> typedDataset = responseWithSelectedColumns.as(Encoders.bean(Response.class));
2933

@@ -33,13 +37,13 @@ public static void main(String[] args) throws Exception {
3337
System.out.println("=== Print 20 records of responses table ===");
3438
typedDataset.show(20);
3539

36-
System.out.println("=== Print records where the response is from Afghanistan ===");
40+
System.out.println("=== Print the responses from Afghanistan ===");
3741
typedDataset.filter(response -> response.getCountry().equals("Afghanistan")).show();
3842

3943
System.out.println("=== Print the count of occupations ===");
4044
typedDataset.groupBy(typedDataset.col("occupation")).count().show();
4145

42-
System.out.println("=== Print records with average mid age less than 20 ===");
46+
System.out.println("=== Print responses with average mid age less than 20 ===");
4347
typedDataset.filter(response -> response.getAgeMidPoint() !=null && response.getAgeMidPoint() < 20).show();
4448

4549
System.out.println("=== Print the result with salary middle point in descending order ===");

0 commit comments

Comments
 (0)