| 
 | 1 | +# Import the libraries  | 
 | 2 | +import numpy as np  | 
 | 3 | +import matplotlib.pyplot as plt  | 
 | 4 | +import pandas as pd  | 
 | 5 | + | 
 | 6 | +# Import the dataset   | 
 | 7 | +dataset = pd.read_csv('data.csv')  | 
 | 8 | +X = dataset.iloc[:, :-1].values  | 
 | 9 | +y = dataset.iloc[:, 3].values  | 
 | 10 | + | 
 | 11 | +# Taking care of missing data  | 
 | 12 | +from sklearn.preprocessing import Imputer  | 
 | 13 | +imputer=Imputer(missing_values='NaN', strategy='mean', axis=0)  | 
 | 14 | +imputer=imputer.fit(X[:, 1:3])  | 
 | 15 | +X[:, 1:3] = imputer.transform(X[:, 1:3])  | 
 | 16 | + | 
 | 17 | +# Encoding categorical data  | 
 | 18 | +from sklearn.preprocessing import LabelEncoder, OneHotEncoder  | 
 | 19 | +labelencoder_X = LabelEncoder()  | 
 | 20 | +X[:,0] = labelencoder_X.fit_transform(X[:,0])  | 
 | 21 | +onehotencoder = OneHotEncoder(categorical_features = [0])  | 
 | 22 | +X = onehotencoder.fit_transform(X).toarray()  | 
 | 23 | +labelencoder_y = LabelEncoder()  | 
 | 24 | +y = labelencoder_y.fit_transform(y)  | 
 | 25 | + | 
 | 26 | +# Splitting the dataset into the training set and test set  | 
 | 27 | +from sklearn.cross_validation import train_test_split  | 
 | 28 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)  | 
 | 29 | + | 
 | 30 | +# Feature scaling  | 
 | 31 | +from sklearn.preprocessing import StandardScaler  | 
 | 32 | +sc_X = StandardScaler()  | 
 | 33 | +X_train = sc_X.fit_transform(X_train)  | 
 | 34 | +X_test = sc_X.transform(X_test)  | 
 | 35 | + | 
0 commit comments