Skip to content

Commit 62d6afd

Browse files
author
Pedro Bernardo
committed
Added sparkSql/join/UkMakerSpaces.py
1 parent 807e4d2 commit 62d6afd

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

sparkSql/join/UkMakerSpaces.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pyspark.sql import SparkSession, functions as fs
2+
3+
if __name__ == "__main__":
4+
5+
session = SparkSession.builder.appName("UkMakerSpaces").master("local").getOrCreate()
6+
sc = session.sparkContext
7+
sc.setLogLevel("ERROR")
8+
9+
makerSpace = session.read \
10+
.option("header", "true") \
11+
.csv("in/uk-makerspaces-identifiable-data.csv")
12+
13+
postCode = session.read \
14+
.option("header", "true") \
15+
.csv("in/uk-postcode.csv") \
16+
.withColumn("PostCode", fs.concat_ws("", fs.col("PostCode"), fs.lit(" ")))
17+
18+
print("=== Print 20 records of makerspace table ===")
19+
makerSpace.select("Name of makerspace", "Postcode").show()
20+
21+
print("=== Print 20 records of postcode table ===")
22+
postCode.show()
23+
24+
joined = makerSpace.join(postCode, makerSpace["Postcode"].startswith(postCode["Postcode"]), "left_outer")
25+
26+
print("=== Group by Region ===")
27+
joined.groupBy("Region").count().show(200)

0 commit comments

Comments
 (0)