Skip to content

Commit 91074f5

Browse files
author
James Lee
committed
improve spark SQL examples
1 parent aa75d6b commit 91074f5

File tree

4 files changed

+7
-13
lines changed

4 files changed

+7
-13
lines changed

src/main/java/com/sparkTutorial/sparkSql/HousePriceProblem.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44
public class HousePriceProblem {
55

6-
/* TODO: Create a Spark program to read the house data from in/RealEstate.csv, group by location, aggregate the average price per SQ Ft and max price, and sort by average price per SQ Ft.
6+
/* Create a Spark program to read the house data from in/RealEstate.csv,
7+
group by location, aggregate the average price per SQ Ft and max price, and sort by average price per SQ Ft.
78
8-
The HOUSES dataset contains a collection of recent real estate listings in San Luis Obispo county and
9-
around it. The dataset is provided in two formats: as a CSV file and as a Microsoft Excel (1997­2003)
10-
spreadsheet.
9+
The houses dataset contains a collection of recent real estate listings in San Luis Obispo county and
10+
around it. 
1111
1212
The dataset contains the following fields:
1313
1. MLS: Multiple listing service number for the house (unique ID).

src/main/java/com/sparkTutorial/sparkSql/HousePriceSolution.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public static void main(String[] args) throws Exception {
2323

2424
Dataset<Row> realEstate = session.read().option("header", "true").csv("in/RealEstate.csv");
2525

26-
Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long")).withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
26+
Dataset<Row> castedRealEstate = realEstate.withColumn(PRICE, col(PRICE).cast("long"))
27+
.withColumn(PRICE_SQ_FT, col(PRICE_SQ_FT).cast("long"));
2728

2829
castedRealEstate.groupBy("Location")
2930
.agg(avg(PRICE_SQ_FT), max(PRICE))

src/main/java/com/sparkTutorial/sparkSql/RddDatasetConversion.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ public class RddDatasetConversion {
1414
private static final String COMMA_DELIMITER = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
1515

1616
public static void main(String[] args) throws Exception {
17-
1817
Logger.getLogger("org").setLevel(Level.ERROR);
1918
SparkConf conf = new SparkConf().setAppName("StackOverFlowSurvey").setMaster("local[1]");
2019

@@ -39,15 +38,11 @@ public static void main(String[] args) throws Exception {
3938
responseDataset.show(20);
4039

4140
JavaRDD<Response> responseJavaRDD = responseDataset.toJavaRDD();
42-
4341
for (Response response : responseJavaRDD.collect()) {
4442
System.out.println(response);
4543
}
46-
4744
}
48-
4945
private static Integer toInt(String split) {
5046
return split.isEmpty() ? null : Math.round(Float.valueOf(split));
5147
}
52-
5348
}

src/main/java/com/sparkTutorial/sparkSql/Response.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ public Response(String country, Integer ageMidPoint, String occupation, Integer
1515
this.salaryMidPoint = salaryMidPoint;
1616
}
1717

18-
public Response() {
19-
}
18+
public Response() {}
2019

2120
public String getCountry() {
2221
return country;
@@ -50,7 +49,6 @@ public void setSalaryMidPoint(Integer salaryMidPoint) {
5049
this.salaryMidPoint = salaryMidPoint;
5150
}
5251

53-
5452
@Override
5553
public String toString() {
5654
return "Response{" +

0 commit comments

Comments
 (0)