Added rdd/airports/*.py

Pedro Bernardo · Pedro Bernardo · commit afc939b980cf · 2017-09-28T16:13:48.000+02:00
diff --git a/rdd/airports/AirportsByLatitudeProblem.py b/rdd/airports/AirportsByLatitudeProblem.py
@@ -0,0 +1,17 @@
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+
+    '''
+    Create a Spark program to read the airport data from in/airports.text,  find all the airports whose latitude are bigger than 40.
+    Then output the airport's name and the airport's latitude to out/airports_by_latitude.text.
+
+    Each row of the input file contains the following columns:
+    Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
+    ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
+
+    Sample output:
+    "St Anthony", 51.391944
+    "Tofino", 49.082222
+    ...
+    '''
diff --git a/rdd/airports/AirportsByLatitudeSolution.py b/rdd/airports/AirportsByLatitudeSolution.py
@@ -0,0 +1,17 @@
+from pyspark import SparkContext
+from commons.Utils import Utils
+
+def splitComma(line: str):
+    splits = Utils.COMMA_DELIMITER.split(line)
+    return "{}, {}".format(splits[1], splits[6])
+
+if __name__ == "__main__":
+    sc = SparkContext("local", "airports")
+    
+    airports = sc.textFile("in/airports.text")
+
+    airportsInUSA = airports.filter(lambda line: float(Utils.COMMA_DELIMITER.split(line)[6]) > 40)
+    
+    airportsNameAndCityNames = airportsInUSA.map(splitComma)
+
+    airportsNameAndCityNames.saveAsTextFile("out/airports_by_latitude.text")
diff --git a/rdd/airports/AirportsInUsaProblem.py b/rdd/airports/AirportsInUsaProblem.py
@@ -0,0 +1,17 @@
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+
+    '''
+    Create a Spark program to read the airport data from in/airports.text, find all the airports which are located in United States
+    and output the airport's name and the city's name to out/airports_in_usa.text.
+
+    Each row of the input file contains the following columns:
+    Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
+    ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
+
+    Sample output:
+    "Putnam County Airport", "Greencastle"
+    "Dowagiac Municipal Airport", "Dowagiac"
+    ...
+    '''
diff --git a/rdd/airports/AirportsInUsaSolution.py b/rdd/airports/AirportsInUsaSolution.py
@@ -0,0 +1,15 @@
+from pyspark import SparkContext
+from commons.Utils import Utils
+
+def splitComma(line: str):
+    splits = Utils.COMMA_DELIMITER.split(line)
+    return "{}, {}".format(splits[1], splits[2])
+
+if __name__ == "__main__":
+    sc = SparkContext("local", "count")
+
+    airports = sc.textFile("in/airports.text")
+    airportsInUSA = airports.filter(lambda line : Utils.COMMA_DELIMITER.split(line)[3] == "\"United States\"")
+
+    airportsNameAndCityNames = airportsInUSA.map(splitComma)
+    airportsNameAndCityNames.saveAsTextFile("out/airports_in_usa.text")