@@ -12,7 +12,6 @@ object TypedDataset {
12
12
def main (args : Array [String ]) {
13
13
Logger .getLogger(" org" ).setLevel(Level .ERROR )
14
14
val session = SparkSession .builder().appName(" StackOverFlowSurvey" ).master(" local[*]" ).getOrCreate()
15
- import session .implicits ._
16
15
17
16
val dataFrameReader = session.read
18
17
@@ -21,12 +20,16 @@ object TypedDataset {
21
20
.option(" inferSchema" , value = true )
22
21
.csv(" in/2016-stack-overflow-survey-responses.csv" )
23
22
24
- val responseWithSelectedColumns = responses.withColumn(" country" , responses.col(" country" ))
25
- .withColumn(" ageMidPoint" , responses.col(" age_midpoint" ).cast(" integer" ))
23
+ val responseWithSelectedColumns = responses.select(" country" , " age_midpoint" , " occupation" , " salary_midpoint" )
24
+
25
+ val responseWithRenamedColumns = responseWithSelectedColumns
26
+ .withColumn(" country" , responses.col(" country" ))
27
+ .withColumn(AGE_MIDPOINT , responses.col(" age_midpoint" ).cast(" integer" ))
26
28
.withColumn(" occupation" , responses.col(" occupation" ))
27
- .withColumn(" salaryMidPoint " , responses.col(" salary_midpoint" ).cast(" integer" ))
29
+ .withColumn(SALARY_MIDPOINT , responses.col(" salary_midpoint" ).cast(" integer" ))
28
30
29
- val typedDataset = responseWithSelectedColumns.as[Response ]
31
+ import session .implicits ._
32
+ val typedDataset = responseWithRenamedColumns.as[Response ]
30
33
31
34
System .out.println(" === Print out schema ===" )
32
35
typedDataset.printSchema()
0 commit comments