Skip to content

Commit c095cf0

Browse files
authored
Add files via upload
1 parent 2ac26c7 commit c095cf0

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed

Pandas/pandas_weather.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
def header(msg):
5+
print('-' * 50)
6+
print('[ ' + msg + ' ]')
7+
8+
# 1. load hard-coded data into a dataframe
9+
header("1. load hard-coded data into a df")
10+
df = pd.DataFrame(
11+
[['Jan',58,42,74,22,2.95],
12+
['Feb',61,45,78,26,3.02],
13+
['Mar',65,48,84,25,2.34],
14+
['Apr',67,50,92,28,1.02],
15+
['May',71,53,98,35,0.48],
16+
['Jun',75,56,107,41,0.11],
17+
['Jul',77,58,105,44,0.0],
18+
['Aug',77,59,102,43,0.03],
19+
['Sep',77,57,103,40,0.17],
20+
['Oct',73,54,96,34,0.81],
21+
['Nov',64,48,84,30,1.7],
22+
['Dec',58,42,73,21,2.56]],
23+
index = [0,1,2,3,4,5,6,7,8,9,10,11],
24+
columns = ['month','avg_high','avg_low','record_high','record_low','avg_precipitation'])
25+
print(df)
26+
27+
# 2. read text file into a dataframe
28+
header("2. read text file into a df")
29+
filename = 'Fremont_weather.txt'
30+
df = pd.read_csv(filename)
31+
print(df)
32+
33+
# 3. print first 5 or last 3 rows of df
34+
header("3. df.head()")
35+
print(df.head())
36+
header("3. df.tail(3)")
37+
print(df.tail(3))
38+
39+
# 4. get data types, index, columns, values
40+
header("4. df.dtypes")
41+
print(df.dtypes)
42+
43+
header("4. df.index")
44+
print(df.index)
45+
46+
header("4. df.columns")
47+
print(df.columns)
48+
49+
header("4. df.values")
50+
print(df.values)
51+
52+
# 5. statistical summary of each column
53+
header("5. df.describe()")
54+
print(df.describe())
55+
56+
# 6. sort records by any column
57+
header("6. df.sort_values('record_high', ascending=False)")
58+
print (df.sort_values('record_high', ascending=False))
59+
60+
# 7. slicing records
61+
header("7. slicing -- df.avg_low")
62+
print(df.avg_low) # index with single column
63+
64+
header("7. slicing -- df['avg_low']")
65+
print(df['avg_low'])
66+
67+
header("7. slicing -- df[2:4]") # index with single column
68+
print(df[2:4]) # rows 2 to 3
69+
70+
header("7. slicing -- df[['avg_low','avg_high']]")
71+
print(df[['avg_low','avg_high']])
72+
73+
header("7. slicing -- df.loc[:,['avg_low','avg_high']]")
74+
print(df.loc[:,['avg_low','avg_high']]) # multiple columns: df.loc[from_row:to_row,['column1','column2']]
75+
76+
header("7. slicing scalar value -- df.loc[9,['avg_precipitation']]")
77+
print(df.loc[9,['avg_precipitation']])
78+
79+
header("7. df.iloc[3:5,[0,3]]") # index location can receive range or list of indices
80+
print(df.iloc[3:5,[0,3]])
81+
82+
# 8. filtering
83+
header("8. df[df.avg_precipitation > 1.0]") # filter on column values
84+
print(df[df.avg_precipitation > 1.0])
85+
86+
header("8. df[df['month'].isin['Jun','Jul','Aug']]")
87+
print(df[df['month'].isin(['Jun','Jul','Aug'])])
88+
89+
# 9. assignment -- very similar to slicing
90+
header("9. df.loc[9,['avg_precipitation']] = 101.3")
91+
df.loc[9,['avg_precipitation']] = 101.3
92+
print(df.iloc[9:11])
93+
94+
header("9. df.loc[9,['avg_precipitation']] = np.nan")
95+
df.loc[9,['avg_precipitation']] = np.nan
96+
print(df.iloc[9:11])
97+
98+
header("9. df.loc[:,'avg_low'] = np.array([5] * len(df))")
99+
df.loc[:,'avg_low'] = np.array([5] * len(df))
100+
print(df.head())
101+
102+
header("9. df['avg_day'] = (df.avg_low + df.avg_high) / 2")
103+
df['avg_day'] = (df.avg_low + df.avg_high) / 2
104+
print(df.head())
105+
106+
# 10. renaming columns
107+
header("10. df.rename(columns = {'avg_precipitation':'avg_rain'}, inplace=True)")
108+
df.rename(columns = {'avg_precipitation':'avg_rain'}, inplace=True) # rename 1 column
109+
print(df.head())
110+
111+
header("10. df.columns = ['month','av_hi','av_lo','rec_hi','rec_lo','av_rain','av_day']")
112+
df.columns = ['month','av_hi','av_lo','rec_hi','rec_lo','av_rain','av_day']
113+
print(df.head())
114+
115+
# 11. iterate a df
116+
header("11. iterate rows of df with a for loop")
117+
for index, row in df.iterrows():
118+
print (index, row["month"], row["avg_high"])
119+
120+
# 12. write to csv file
121+
df.to_csv('foo.csv')
122+
123+

0 commit comments

Comments
 (0)