Skip to content

Commit afc939b

Browse files
author
Pedro Bernardo
committed
Added rdd/airports/*.py
1 parent 08b146a commit afc939b

File tree

4 files changed

+66
-0
lines changed

4 files changed

+66
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pyspark import SparkContext
2+
3+
if __name__ == "__main__":
4+
5+
'''
6+
Create a Spark program to read the airport data from in/airports.text, find all the airports whose latitude are bigger than 40.
7+
Then output the airport's name and the airport's latitude to out/airports_by_latitude.text.
8+
9+
Each row of the input file contains the following columns:
10+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
11+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
12+
13+
Sample output:
14+
"St Anthony", 51.391944
15+
"Tofino", 49.082222
16+
...
17+
'''
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pyspark import SparkContext
2+
from commons.Utils import Utils
3+
4+
def splitComma(line: str):
5+
splits = Utils.COMMA_DELIMITER.split(line)
6+
return "{}, {}".format(splits[1], splits[6])
7+
8+
if __name__ == "__main__":
9+
sc = SparkContext("local", "airports")
10+
11+
airports = sc.textFile("in/airports.text")
12+
13+
airportsInUSA = airports.filter(lambda line: float(Utils.COMMA_DELIMITER.split(line)[6]) > 40)
14+
15+
airportsNameAndCityNames = airportsInUSA.map(splitComma)
16+
17+
airportsNameAndCityNames.saveAsTextFile("out/airports_by_latitude.text")

rdd/airports/AirportsInUsaProblem.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from pyspark import SparkContext
2+
3+
if __name__ == "__main__":
4+
5+
'''
6+
Create a Spark program to read the airport data from in/airports.text, find all the airports which are located in United States
7+
and output the airport's name and the city's name to out/airports_in_usa.text.
8+
9+
Each row of the input file contains the following columns:
10+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
11+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
12+
13+
Sample output:
14+
"Putnam County Airport", "Greencastle"
15+
"Dowagiac Municipal Airport", "Dowagiac"
16+
...
17+
'''

rdd/airports/AirportsInUsaSolution.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pyspark import SparkContext
2+
from commons.Utils import Utils
3+
4+
def splitComma(line: str):
5+
splits = Utils.COMMA_DELIMITER.split(line)
6+
return "{}, {}".format(splits[1], splits[2])
7+
8+
if __name__ == "__main__":
9+
sc = SparkContext("local", "count")
10+
11+
airports = sc.textFile("in/airports.text")
12+
airportsInUSA = airports.filter(lambda line : Utils.COMMA_DELIMITER.split(line)[3] == "\"United States\"")
13+
14+
airportsNameAndCityNames = airportsInUSA.map(splitComma)
15+
airportsNameAndCityNames.saveAsTextFile("out/airports_in_usa.text")

0 commit comments

Comments
 (0)