File tree Expand file tree Collapse file tree 2 files changed +55
-0
lines changed Expand file tree Collapse file tree 2 files changed +55
-0
lines changed Original file line number Diff line number Diff line change
1
+ from pyspark import SparkContext
2
+ from commons .Utils import Utils
3
+
4
+ def getPostPrefix (line : str ):
5
+ splits = Utils .COMMA_DELIMITER .split (line )
6
+ postcode = splits [4 ]
7
+ return None if not postcode else postcode .split (" " )[0 ]
8
+
9
+ def loadPostCodeMap ():
10
+ lines = open ("in/uk-postcode.csv" , "r" ).read ().split ("\n " )
11
+ splitsForLines = [Utils .COMMA_DELIMITER .split (line ) for line in lines if line != "" ]
12
+ return {splits [0 ]: splits [7 ] for splits in splitsForLines }
13
+
14
+ if __name__ == "__main__" :
15
+ sc = SparkContext ("local" , "UkMakerSpaces" )
16
+ sc .setLogLevel ("ERROR" )
17
+
18
+ postCodeMap = sc .broadcast (loadPostCodeMap ())
19
+
20
+ makerSpaceRdd = sc .textFile ("in/uk-makerspaces-identifiable-data.csv" )
21
+
22
+ regions = makerSpaceRdd \
23
+ .filter (lambda line : Utils .COMMA_DELIMITER .split (line )[0 ] != "Timestamp" ) \
24
+ .filter (lambda line : getPostPrefix (line ) is not None ) \
25
+ .map (lambda line : postCodeMap .value [getPostPrefix (line )] \
26
+ if getPostPrefix (line ) in postCodeMap .value else "Unknow" )
27
+
28
+ for region , count in regions .countByValue ().items ():
29
+ print ("{} : {}" .format (region , count ))
Original file line number Diff line number Diff line change
1
+ from pyspark import SparkContext
2
+ from commons .Utils import Utils
3
+
4
+ def getPostPrefixes (line : str ):
5
+ postcode = Utils .COMMA_DELIMITER .split (line )[4 ]
6
+ cleanedPostCode = postcode .replace ("\\ s+" , "" )
7
+ return [cleanedPostCode [0 :i ] for i in range (0 ,len (cleanedPostCode )+ 1 )]
8
+
9
+ def loadPostCodeMap ():
10
+ lines = open ("in/uk-postcode.csv" , "r" ).read ().split ("\n " )
11
+ splitsForLines = [Utils .COMMA_DELIMITER .split (line ) for line in lines if line != "" ]
12
+ return {splits [0 ]: splits [7 ] for splits in splitsForLines }
13
+
14
+ if __name__ == "__main__" :
15
+ sc = SparkContext ("local" , "UkMakerSpaces" )
16
+ sc .setLogLevel ("ERROR" )
17
+ postCodeMap = loadPostCodeMap ()
18
+ makerSpaceRdd = sc .textFile ("in/uk-makerspaces-identifiable-data.csv" )
19
+
20
+ regions = makerSpaceRdd \
21
+ .filter (lambda line : Utils .COMMA_DELIMITER .split (line )[0 ] != "Timestamp" ) \
22
+ .map (lambda line : next ((postCodeMap [prefix ] for prefix in getPostPrefixes (line ) \
23
+ if prefix in postCodeMap ), "Unknow" ))
24
+
25
+ for region , count in regions .countByValue ().items ():
26
+ print ("{} : {}" .format (region , count ))
You can’t perform that action at this time.
0 commit comments