11import pandas
2+ from io import open
23from sklearn import linear_model
34from sklearn .linear_model import LinearRegression
45from sklearn .metrics import mean_squared_error , r2_score
1819dataset = pandas .DataFrame (imp .transform (dataset ))
1920
2021# percentage of input csv to be used
21- numberOfTrain = int (len (dataset )* 80 / 100 )
22- numberOfTest = int (len (dataset )* 20 / 100 )
22+ numberOfTrain = int (len (dataset )* 50 / 100 )
23+ numberOfTest = int (len (dataset )* 10 / 100 )
2324
2425"""
2526# make train and test data
3233powerY_train = dataset .iloc [:numberOfTrain ,3 ]
3334powerY_test = dataset .iloc [numberOfTrain :(numberOfTrain + numberOfTest ),3 ]
3435
35- # linear regression
36- # Create linear regression object
37- regr = linear_model .LinearRegression ()
38-
39- # Create lasso model object
40- #regr = linear_model.Lasso(alpha=0.003)
41-
42- # Train the model using the training sets
43- regr .fit (powerX_train , powerY_train )
44-
45- # Make predictions using the testing set
46- y_pred = regr .predict (powerX_test )
47-
48- # The coefficients
49- print ('Coefficients: \n ' , regr .coef_ )
50- # The mean squared error
51- print ("Mean squared error: %.2f"
52- % mean_squared_error (powerY_test , y_pred ))
53- # Explained variance score: 1 is perfect prediction
54- print ('Variance score: %.2f' % r2_score (powerY_test , y_pred ))
55-
56- #plt.xlabel("test")
57- #plt.ylabel("predict")
58- #plt.plot(powerX_test, y_pred)
59- #plt.show()
60-
61- """
62- # polinomial function
63- degrees = [1, 2, 3, 4]
64- score = []
65-
66- for i in range(len(degrees)):
67- print(i+1)
68- polynomial_features = PolynomialFeatures(degree=degrees[i],
69- include_bias=False)
70-
71- regr = linear_model.LinearRegression() # Ridge, Lasso, LinearRegression
72-
73- pipeline = Pipeline([("polynomial_features", polynomial_features),
74- ("linear_regression", regr)])
75-
76- # train
77- pipeline.fit(powerX_train, powerY_train)
78-
79- # predict
80- y_pred = pipeline.predict(powerX_test)
81- r2 = r2_score(powerY_test, y_pred)
82- score.append(r2)
83-
36+ def linearRegression (type ):
37+ # Create linear regression object
38+ if (type == "lr" ):
39+ regr = linear_model .LinearRegression ()
40+ elif (type == "ridge" ):
41+ regr = linear_model .Ridge (alpha = 0.001 )
42+ elif (type == "lasso" ):
43+ regr = linear_model .Lasso (alpha = 0.003 )
44+ else :
45+ print ("wrong parameter" )
46+ return
47+
48+ # Train the model using the training sets
49+ regr .fit (powerX_train , powerY_train )
50+
51+ # Make predictions using the testing set
52+ y_pred = regr .predict (powerX_test )
53+
54+ # The coefficients
55+ print ('Coefficients: \n ' , regr .coef_ )
8456 # The mean squared error
8557 print ("Mean squared error: %.2f"
8658 % mean_squared_error (powerY_test , y_pred ))
8759 # Explained variance score: 1 is perfect prediction
8860 print ('Variance score: %.2f' % r2_score (powerY_test , y_pred ))
8961
90- plt.xlabel("degrees")
91- plt.ylabel("score")
92- plt.plot(degrees, score)
93- plt.show()
94- """
62+ #plt.xlabel("test")
63+ #plt.ylabel("predict")
64+ #plt.plot(powerX_test, y_pred)
65+ #plt.show()
66+
67+ def polinomialFunction ():
68+ degrees = [1 , 2 , 3 , 4 ]
69+ score = []
70+
71+ for i in range (len (degrees )):
72+ print (i + 1 )
73+ polynomial_features = PolynomialFeatures (degree = degrees [i ],
74+ include_bias = False )
75+
76+ regr = linear_model .LinearRegression () # Ridge, Lasso, LinearRegression
77+
78+ pipeline = Pipeline ([("polynomial_features" , polynomial_features ),
79+ ("linear_regression" , regr )])
80+
81+ # train
82+ pipeline .fit (powerX_train , powerY_train )
83+
84+ # predict
85+ y_pred = pipeline .predict (powerX_test )
86+ r2 = r2_score (powerY_test , y_pred )
87+ score .append (r2 )
88+
89+ # The mean squared error
90+ print ("Mean squared error: %.2f"
91+ % mean_squared_error (powerY_test , y_pred ))
92+ # Explained variance score: 1 is perfect prediction
93+ print ('Variance score: %.2f' % r2 )
94+
95+ plt .xlabel ("degrees" )
96+ plt .ylabel ("score" )
97+ plt .plot (degrees , score )
98+ plt .show ()
99+
100+ # finds best alphas for ridge and lasso in function of variance
101+ # rigde = 0.003, lasso = 0.003, lassoCV = 0.00001
102+ def alphaViz ():
103+ alphas = [0.00001 , 0.001 , 0.003 , 0.01 , 0.03 , 0.1 , 0.3 , 0.9 ]
104+ optimizationFunctions = ['ridge' , 'lasso' , 'lassolars' ]
105+ score = []
106+
107+ f , axarr = plt .subplots (len (optimizationFunctions ))
108+ funcIndex = 0
109+ for func in optimizationFunctions :
110+ for i in alphas :
111+ print (func ,i )
112+
113+ # no switch case in python :(
114+ if func == 'ridge' :
115+ regr = linear_model .Ridge (alpha = i )
116+ elif func == 'lasso' :
117+ regr = linear_model .Lasso (alpha = i )
118+ elif func == 'lassolars' :
119+ regr = linear_model .LassoLars (alpha = i )
120+
121+ # Train the model using the training sets
122+ regr .fit (powerX_train , powerY_train )
123+
124+ # Make predictions using the testing set
125+ y_pred = regr .predict (powerX_test )
126+ r2 = r2_score (powerY_test , y_pred )
127+ score .append (r2 )
128+
129+ # The mean squared error
130+ print ("Mean squared error: %.2f"
131+ % mean_squared_error (powerY_test , y_pred ))
132+ # Explained variance score: 1 is perfect prediction
133+ print ('Variance score: %.2f' % r2_score (powerY_test , y_pred ))
134+
135+ axarr [funcIndex ].plot (alphas , score )
136+ axarr [funcIndex ].set_title (func )
137+ funcIndex = funcIndex + 1
138+ score = []
139+
140+ plt .tight_layout ()
141+ plt .show ()
142+
143+
144+ def compareOptFunctionViz ():
145+ optimizationFunctions = ['linearRegression' , 'ridge' , 'lasso' , 'lassolars' ]
146+ score = []
147+
148+ for func in optimizationFunctions :
149+ if func == 'linearRegression' :
150+ regr = linear_model .LinearRegression ()
151+ elif func == 'ridge' :
152+ regr = linear_model .Ridge (0.9 )
153+ elif func == 'lasso' :
154+ regr = linear_model .Lasso (0.003 )
155+ elif func == 'lassolars' :
156+ regr = linear_model .LassoLars (0.00001 )
157+
158+ # Train the model using the training sets
159+ regr .fit (powerX_train , powerY_train )
160+
161+ # Make predictions using the testing set
162+ y_pred = regr .predict (powerX_test )
163+ r2 = r2_score (powerY_test , y_pred )
164+ score .append (r2 )
165+
166+ plt .plot (optimizationFunctions , score )
167+ plt .tight_layout ()
168+ plt .show ()
169+
170+ #############################
171+
172+ alphaViz ()
173+ #compareOptFunctionViz()
0 commit comments