improve spark SQL examples

James Lee · James Lee · commit 91074f5a8470 · 2017-03-12T15:59:35.000Z
diff --git a/src/main/java/com/sparkTutorial/sparkSql/HousePriceProblem.java b/src/main/java/com/sparkTutorial/sparkSql/HousePriceProblem.java
@@ -3,11 +3,11 @@
 
 public class HousePriceProblem {
 
-        /* TODO: Create a Spark program to read the house data from in/RealEstate.csv, group by location, aggregate the average price per SQ Ft and max price, and sort by average price per SQ Ft.
+        /* Create a Spark program to read the house data from in/RealEstate.csv,
+           group by location, aggregate the average price per SQ Ft and max price, and sort by average price per SQ Ft.
 
-        The HOUSES dataset contains a collection of recent real estate listings in San Luis Obispo county and
-        around it. The dataset is provided in two formats: as a CSV file and as a Microsoft Excel (1997­2003)
-        spreadsheet.
+        The houses dataset contains a collection of recent real estate listings in San Luis Obispo county and
+        around it. 
 
         The dataset contains the following fields:
         1. MLS: Multiple listing service number for the house (unique ID).
diff --git a/src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java b/src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java
@@ -23,7 +23,8 @@ public static void main(String[] args) throws Exception {
 
         Dataset<Row> realEstate = session.read().option("header", "true").csv("in/RealEstate.csv");
 
-        Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long")).withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
+        Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long"))
+                                                  .withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
 
         castedRealEstate.groupBy("Location")
                         .agg(avg(PRICE_SQ_FT), max(PRICE))
diff --git a/src/main/java/com/sparkTutorial/sparkSql/RddDatasetConversion.java b/src/main/java/com/sparkTutorial/sparkSql/RddDatasetConversion.java
@@ -14,7 +14,6 @@ public class RddDatasetConversion {
     private static final String COMMA_DELIMITER = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
 
     public static void main(String[] args) throws Exception {
-
         Logger.getLogger("org").setLevel(Level.ERROR);
         SparkConf conf = new SparkConf().setAppName("StackOverFlowSurvey").setMaster("local[1]");
 
@@ -39,15 +38,11 @@ public static void main(String[] args) throws Exception {
         responseDataset.show(20);
 
         JavaRDD<Response> responseJavaRDD = responseDataset.toJavaRDD();
-
         for (Response response : responseJavaRDD.collect()) {
             System.out.println(response);
         }
-
     }
-
     private static Integer toInt(String split) {
         return split.isEmpty() ? null : Math.round(Float.valueOf(split));
     }
-
 }
diff --git a/src/main/java/com/sparkTutorial/sparkSql/Response.java b/src/main/java/com/sparkTutorial/sparkSql/Response.java
@@ -15,8 +15,7 @@ public Response(String country, Integer ageMidPoint, String occupation, Integer
         this.salaryMidPoint = salaryMidPoint;
     }
 
-    public Response() {
-    }
+    public Response() {}
 
     public String getCountry() {
         return country;
@@ -50,7 +49,6 @@ public void setSalaryMidPoint(Integer salaryMidPoint) {
         this.salaryMidPoint = salaryMidPoint;
     }
 
-
     @Override
     public String toString() {
         return "Response{" +

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,6 @@ public class RddDatasetConversion {`
`14`	`14`	`private static final String COMMA_DELIMITER = ",(?=([^\"]\"[^\"]\")[^\"]$)";`
`15`	`15`
`16`	`16`	`public static void main(String[] args) throws Exception {`
`17`		`-`
`18`	`17`	`Logger.getLogger("org").setLevel(Level.ERROR);`
`19`	`18`	`SparkConf conf = new SparkConf().setAppName("StackOverFlowSurvey").setMaster("local[1]");`
`20`	`19`
`@@ -39,15 +38,11 @@ public static void main(String[] args) throws Exception {`
`39`	`38`	`responseDataset.show(20);`
`40`	`39`
`41`	`40`	`JavaRDD<Response> responseJavaRDD = responseDataset.toJavaRDD();`
`42`		`-`
`43`	`41`	`for (Response response : responseJavaRDD.collect()) {`
`44`	`42`	`System.out.println(response);`
`45`	`43`	`}`
`46`		`-`
`47`	`44`	`}`
`48`		`-`
`49`	`45`	`private static Integer toInt(String split) {`
`50`	`46`	`return split.isEmpty() ? null : Math.round(Float.valueOf(split));`
`51`	`47`	`}`
`52`		`-`
`53`	`48`	`}`
Original file line number	Diff line number	Diff line change
`@@ -15,8 +15,7 @@ public Response(String country, Integer ageMidPoint, String occupation, Integer`
`15`	`15`	`this.salaryMidPoint = salaryMidPoint;`
`16`	`16`	`}`
`17`	`17`
`18`		`- public Response() {`
`19`		`- }`
	`18`	`+ public Response() {}`
`20`	`19`
`21`	`20`	`public String getCountry() {`
`22`	`21`	`return country;`
`@@ -50,7 +49,6 @@ public void setSalaryMidPoint(Integer salaryMidPoint) {`
`50`	`49`	`this.salaryMidPoint = salaryMidPoint;`
`51`	`50`	`}`
`52`	`51`
`53`		`-`
`54`	`52`	`@Override`
`55`	`53`	`public String toString() {`
`56`	`54`	`return "Response{" +`