@@ -12,7 +12,6 @@ object TypedDataset {
12
12
def main (args : Array [String ]) {
13
13
Logger .getLogger(" org" ).setLevel(Level .ERROR )
14
14
val session = SparkSession .builder().appName(" StackOverFlowSurvey" ).master(" local[*]" ).getOrCreate()
15
-
16
15
val dataFrameReader = session.read
17
16
18
17
val responses = dataFrameReader
@@ -22,14 +21,8 @@ object TypedDataset {
22
21
23
22
val responseWithSelectedColumns = responses.select(" country" , " age_midpoint" , " occupation" , " salary_midpoint" )
24
23
25
- val responseWithRenamedColumns = responseWithSelectedColumns
26
- .withColumn(" country" , responses.col(" country" ))
27
- .withColumn(AGE_MIDPOINT , responses.col(AGE_MIDPOINT ).cast(" integer" ))
28
- .withColumn(" occupation" , responses.col(" occupation" ))
29
- .withColumn(SALARY_MIDPOINT , responses.col(SALARY_MIDPOINT ).cast(" integer" ))
30
-
31
24
import session .implicits ._
32
- val typedDataset = responseWithRenamedColumns .as[Response ]
25
+ val typedDataset = responseWithSelectedColumns .as[Response ]
33
26
34
27
System .out.println(" === Print out schema ===" )
35
28
typedDataset.printSchema()
@@ -44,7 +37,7 @@ object TypedDataset {
44
37
typedDataset.groupBy(typedDataset.col(" occupation" )).count().show()
45
38
46
39
System .out.println(" === Print responses with average mid age less than 20 ===" )
47
- typedDataset.filter(response => response.age_midpoint.isDefined && response.age_midpoint.get < 20 ).show()
40
+ typedDataset.filter(response => response.age_midpoint.isDefined && response.age_midpoint.get < 20.0 ).show()
48
41
49
42
System .out.println(" === Print the result by salary middle point in descending order ===" )
50
43
typedDataset.orderBy(typedDataset.col(SALARY_MIDPOINT ).desc).show()
0 commit comments