11import numpy as np
22import matplotlib .pyplot as plt
3+ import statsmodels .api as sm
34
45
56class Metrics :
@@ -14,35 +15,53 @@ class Metrics:
1415 """
1516
1617 def sse (self ):
17- """returns sum of squared errors (model vs actual)"""
18+ """Returns sum of squared errors (model vs actual)"""
19+ if not self .is_fitted :
20+ print ("Model not fitted yet!" )
21+ return None
1822 squared_errors = (self .resid_ ) ** 2
1923 self .sq_error_ = np .sum (squared_errors )
2024 return self .sq_error_
2125
2226 def sst (self ):
23- """returns total sum of squared errors (actual vs avg(actual))"""
27+ """Returns total sum of squared errors (actual vs avg(actual))"""
28+ if not self .is_fitted :
29+ print ("Model not fitted yet!" )
30+ return None
2431 avg_y = np .mean (self .target_ )
2532 squared_errors = (self .target_ - avg_y ) ** 2
2633 self .sst_ = np .sum (squared_errors )
2734 return self .sst_
2835
2936 def r_squared (self ):
30- """returns calculated value of r^2"""
37+ """Returns calculated value of r^2"""
38+ if not self .is_fitted :
39+ print ("Model not fitted yet!" )
40+ return None
3141 self .r_sq_ = 1 - self .sse () / self .sst ()
3242 return self .r_sq_
3343
3444 def adj_r_squared (self ):
35- """returns calculated value of adjusted r^2"""
45+ """Returns calculated value of adjusted r^2"""
46+ if not self .is_fitted :
47+ print ("Model not fitted yet!" )
48+ return None
3649 self .adj_r_sq_ = 1 - (self .sse () / self .dfe_ ) / (self .sst () / self .dft_ )
3750 return self .adj_r_sq_
3851
3952 def mse (self ):
40- """returns calculated value of mse"""
53+ """Returns calculated value of mse"""
54+ if not self .is_fitted :
55+ print ("Model not fitted yet!" )
56+ return None
4157 self .mse_ = np .mean ((self .predict (self .features_ ) - self .target_ ) ** 2 )
4258 return self .mse_
4359
4460 def pretty_print_stats (self ):
45- """returns report of statistics for a given model object"""
61+ """Returns report of statistics for a given model object"""
62+ if not self .is_fitted :
63+ print ("Model not fitted yet!" )
64+ return None
4665 items = (
4766 ("sse:" , self .sse ()),
4867 ("sst:" , self .sst ()),
@@ -54,10 +73,30 @@ def pretty_print_stats(self):
5473 print ("{0:8} {1:.4f}" .format (item [0 ], item [1 ]))
5574
5675
76+ class Inference :
77+ """
78+ Inferential statistics: standard error, p-values, etc.
79+ """
80+
81+ def __init__ ():
82+ pass
83+
84+ def pvalues (self ):
85+ """
86+ Returns p-values of the features
87+ """
88+ if not self .is_fitted :
89+ print ("Model not fitted yet!" )
90+ return None
91+ lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
92+ return lm .pvalues
93+
94+
5795class Diagnostics_plots :
5896 """
5997 Diagnostics plots and methods
6098
99+ Arguments:
61100 fitted_vs_residual: Plots fitted values vs. residuals
62101 fitted_vs_features: Plots residuals vs all feature variables in a grid
63102 histogram_resid: Plots a histogram of the residuals (can be normalized)
@@ -70,6 +109,9 @@ def __init__():
70109
71110 def fitted_vs_residual (self ):
72111 """Plots fitted values vs. residuals"""
112+ if not self .is_fitted :
113+ print ("Model not fitted yet!" )
114+ return None
73115 plt .title ("Fitted vs. residuals plot" , fontsize = 14 )
74116 plt .scatter (self .fitted_ , self .resid_ , edgecolor = "k" )
75117 plt .hlines (
@@ -85,6 +127,9 @@ def fitted_vs_residual(self):
85127
86128 def fitted_vs_features (self ):
87129 """Plots residuals vs all feature variables in a grid"""
130+ if not self .is_fitted :
131+ print ("Model not fitted yet!" )
132+ return None
88133 num_plots = self .features_ .shape [1 ]
89134 if num_plots % 3 == 0 :
90135 nrows = int (num_plots / 3 )
@@ -117,6 +162,9 @@ def fitted_vs_features(self):
117162
118163 def histogram_resid (self , normalized = True ):
119164 """Plots a histogram of the residuals (can be normalized)"""
165+ if not self .is_fitted :
166+ print ("Model not fitted yet!" )
167+ return None
120168 if normalized :
121169 norm_r = self .resid_ / np .linalg .norm (self .resid_ )
122170 else :
@@ -130,6 +178,9 @@ def histogram_resid(self, normalized=True):
130178
131179 def shapiro_test (self , normalized = True ):
132180 """Performs Shapiro-Wilk normality test on the residuals"""
181+ if not self .is_fitted :
182+ print ("Model not fitted yet!" )
183+ return None
133184 from scipy .stats import shapiro
134185
135186 if normalized :
@@ -146,6 +197,9 @@ def shapiro_test(self, normalized=True):
146197
147198 def qqplot_resid (self , normalized = True ):
148199 """Creates a quantile-quantile plot for residuals comparing with a normal distribution"""
200+ if not self .is_fitted :
201+ print ("Model not fitted yet!" )
202+ return None
149203 from scipy .stats import probplot
150204
151205 if normalized :
@@ -172,6 +226,9 @@ def __init__():
172226
173227 def pairplot (self ):
174228 """Creates pairplot of all variables and the target using the Seaborn library"""
229+ if not self .is_fitted :
230+ print ("Model not fitted yet!" )
231+ return None
175232
176233 print ("This may take a little time. Have patience..." )
177234 from seaborn import pairplot
@@ -188,6 +245,9 @@ def plot_fitted(self, reference_line=False):
188245 Arguments:
189246 reference_line: A Boolean switch to draw a 45-degree reference line on the plot
190247 """
248+ if not self .is_fitted :
249+ print ("Model not fitted yet!" )
250+ return None
191251 plt .title ("True vs. fitted values" , fontsize = 14 )
192252 plt .scatter (y , self .fitted_ , s = 100 , alpha = 0.75 , color = "red" , edgecolor = "k" )
193253 if reference_line :
@@ -212,6 +272,9 @@ def __init__():
212272
213273 def cook_distance (self ):
214274 """Computes and plots Cook\' s distance"""
275+ if not self .is_fitted :
276+ print ("Model not fitted yet!" )
277+ return None
215278 import statsmodels .api as sm
216279 from statsmodels .stats .outliers_influence import OLSInfluence as influence
217280
@@ -226,6 +289,9 @@ def cook_distance(self):
226289
227290 def influence_plot (self ):
228291 """Creates the influence plot"""
292+ if not self .is_fitted :
293+ print ("Model not fitted yet!" )
294+ return None
229295 import statsmodels .api as sm
230296
231297 lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
@@ -235,6 +301,9 @@ def influence_plot(self):
235301
236302 def leverage_resid_plot (self ):
237303 """Plots leverage vs normalized residuals' square"""
304+ if not self .is_fitted :
305+ print ("Model not fitted yet!" )
306+ return None
238307 import statsmodels .api as sm
239308
240309 lm = sm .OLS (self .target_ , sm .add_constant (self .features_ )).fit ()
@@ -255,6 +324,9 @@ def __init__():
255324
256325 def vif (self ):
257326 """Computes variance influence factors for each feature variable"""
327+ if not self .is_fitted :
328+ print ("Model not fitted yet!" )
329+ return None
258330 import statsmodels .api as sm
259331 from statsmodels .stats .outliers_influence import (
260332 variance_inflation_factor as vif ,
@@ -267,24 +339,27 @@ def vif(self):
267339
268340
269341class MyLinearRegression (
270- Metrics , Diagnostics_plots , Data_plots , Outliers , Multicollinearity
342+ Metrics , Diagnostics_plots , Data_plots , Outliers , Multicollinearity , Inference
271343):
272344 def __init__ (self , fit_intercept = True ):
273345 self .coef_ = None
274346 self .intercept_ = None
275347 self ._fit_intercept = fit_intercept
348+ self .is_fitted = False
349+ self .features_ = None
350+ self .target_ = None
276351
277352 def __repr__ (self ):
278353 return "I am a Linear Regression model!"
279354
280- def fit (self , X , y ):
355+ def ingest_data (self , X , y ):
281356 """
282- Fit model coefficients.
357+ Ingests the given data
358+
283359 Arguments:
284360 X: 1D or 2D numpy array
285361 y: 1D numpy array
286362 """
287-
288363 # check if X is 1D or 2D array
289364 if len (X .shape ) == 1 :
290365 X = X .reshape (- 1 , 1 )
@@ -293,16 +368,33 @@ def fit(self, X, y):
293368 self .features_ = X
294369 self .target_ = y
295370
371+ def fit (self , X = None , y = None , _fit_intercept = True ):
372+ """
373+ Fit model coefficients.
374+ Arguments:
375+ X: 1D or 2D numpy array
376+ y: 1D numpy array
377+ """
378+
379+ if X != None :
380+ if len (X .shape ) == 1 :
381+ X = X .reshape (- 1 , 1 )
382+ self .features_ = X
383+ if y != None :
384+ self .target_ = y
385+
296386 # degrees of freedom of population dependent variable variance
297- self .dft_ = X .shape [0 ] - 1
387+ self .dft_ = self . features_ .shape [0 ] - 1
298388 # degrees of freedom of population error variance
299- self .dfe_ = X . shape [0 ] - X .shape [1 ] - 1
389+ self .dfe_ = self . features_ . shape [0 ] - self . features_ .shape [1 ] - 1
300390
301391 # add bias if fit_intercept is True
302392 if self ._fit_intercept :
303- X_biased = np .c_ [np .ones (X . shape [0 ]), X ]
393+ X_biased = np .c_ [np .ones (self . features_ . shape [0 ]), self . features_ ]
304394 else :
305- X_biased = X
395+ X_biased = self .features_
396+ # Assign target_ to a local variable y
397+ y = self .target_
306398
307399 # closed form solution
308400 xTx = np .dot (X_biased .T , X_biased )
@@ -319,12 +411,15 @@ def fit(self, X, y):
319411 self .coef_ = coef
320412
321413 # Predicted/fitted y
322- self .fitted_ = np .dot (X , self .coef_ ) + self .intercept_
414+ self .fitted_ = np .dot (self . features_ , self .coef_ ) + self .intercept_
323415
324416 # Residuals
325417 residuals = self .target_ - self .fitted_
326418 self .resid_ = residuals
327419
420+ # Set is_fitted to True
421+ self .is_fitted = True
422+
328423 def predict (self , X ):
329424 """Output model prediction.
330425 Arguments:
0 commit comments