1+ import pandas as pd
2+ import pickle
3+ import numpy as np
4+ from sklearn .linear_model import LinearRegression
5+ from sklearn .datasets import make_regression
6+
7+ NUM_FEATURES = 10
8+ NUM_SAMPLES = 1000
9+
10+ # Make data
11+ def make_data ():
12+ X ,y = make_regression (n_samples = NUM_SAMPLES ,n_features = NUM_FEATURES ,
13+ n_informative = NUM_FEATURES ,noise = 0.5 )
14+ data = pd .DataFrame (X ,columns = ['X' + str (i ) for i in range (1 ,NUM_FEATURES + 1 )],dtype = np .float16 )
15+ data ['y' ]= np .array (y ,dtype = np .float16 )
16+ return data
17+
18+ # Test/Train
19+ def test_train (data ):
20+ X_train ,y_train = data .iloc [:int (NUM_SAMPLES / 2 )].drop (['y' ],axis = 1 ),data .iloc [:int (NUM_SAMPLES / 2 )]['y' ]
21+ X_test ,y_test = data .iloc [int (NUM_SAMPLES / 2 ):].drop (['y' ],axis = 1 ),data .iloc [int (NUM_SAMPLES / 2 ):]['y' ]
22+ return (X_train ,y_train ,X_test ,y_test )
23+
24+ # Fitting
25+ def fitting (X_train ,y_train ):
26+ lm = LinearRegression (n_jobs = 1 )
27+ lm .fit (X_train ,y_train )
28+ del X_train
29+ del y_train
30+ return lm
31+
32+ # Saving model
33+ def save (lm ):
34+ with open ('LinearModel.sav' ,mode = 'wb' ) as f :
35+ pickle .dump (lm ,f )
36+
37+ def model_run (model ,testfile ):
38+ """
39+ Loads and runs a sklearn linear model
40+ """
41+ lm = pickle .load (open (model , 'rb' ))
42+ X_test = pd .read_csv (testfile )
43+ _ = lm .predict (X_test )
44+ return None
45+
46+ if __name__ == '__main__' :
47+ data = make_data ()
48+ X_train ,y_train ,X_test ,y_test = test_train (data )
49+ #X_test.to_csv("Test.csv",index=False)
50+ lm = fitting (X_train ,y_train )
51+ save (lm )
52+ model_run ('LinearModel.sav' ,'Test.csv' )
0 commit comments