@@ -17,13 +17,17 @@ public class TypedDataset {
17
17
public static void main (String [] args ) throws Exception {
18
18
19
19
Logger .getLogger ("org" ).setLevel (Level .ERROR );
20
- SparkSession session = SparkSession .builder ().appName ("StackOverFlowSurvey" ).master ("local[1 ]" ).getOrCreate ();
20
+ SparkSession session = SparkSession .builder ().appName ("StackOverFlowSurvey" ).master ("local[* ]" ).getOrCreate ();
21
21
22
22
DataFrameReader dataFrameReader = session .read ();
23
23
24
24
Dataset <Row > responses = dataFrameReader .option ("header" ,"true" ).csv ("in/2016-stack-overflow-survey-responses.csv" );
25
25
26
- Dataset <Row > responseWithSelectedColumns = responses .select (col ("country" ), col ("age_midpoint" ).as ("ageMidPoint" ).cast ("integer" ), col ("occupation" ), col ("salary_midpoint" ).as ("salaryMidPoint" ).cast ("integer" ));
26
+ Dataset <Row > responseWithSelectedColumns = responses .select (
27
+ col ("country" ),
28
+ col ("age_midpoint" ).as ("ageMidPoint" ).cast ("integer" ),
29
+ col ("occupation" ),
30
+ col ("salary_midpoint" ).as ("salaryMidPoint" ).cast ("integer" ));
27
31
28
32
Dataset <Response > typedDataset = responseWithSelectedColumns .as (Encoders .bean (Response .class ));
29
33
@@ -33,13 +37,13 @@ public static void main(String[] args) throws Exception {
33
37
System .out .println ("=== Print 20 records of responses table ===" );
34
38
typedDataset .show (20 );
35
39
36
- System .out .println ("=== Print records where the response is from Afghanistan ===" );
40
+ System .out .println ("=== Print the responses from Afghanistan ===" );
37
41
typedDataset .filter (response -> response .getCountry ().equals ("Afghanistan" )).show ();
38
42
39
43
System .out .println ("=== Print the count of occupations ===" );
40
44
typedDataset .groupBy (typedDataset .col ("occupation" )).count ().show ();
41
45
42
- System .out .println ("=== Print records with average mid age less than 20 ===" );
46
+ System .out .println ("=== Print responses with average mid age less than 20 ===" );
43
47
typedDataset .filter (response -> response .getAgeMidPoint () !=null && response .getAgeMidPoint () < 20 ).show ();
44
48
45
49
System .out .println ("=== Print the result with salary middle point in descending order ===" );
0 commit comments