Skip to content

Commit f954b37

Browse files
authored
Merge pull request TheAlgorithms#92 from yashLadha/master
Added Linear regression
2 parents efb77ad + 1727d79 commit f954b37

File tree

1 file changed

+108
-0
lines changed

1 file changed

+108
-0
lines changed

machine_learning/linear_regression.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Linear regression is the most basic type of regression commonly used for
3+
predictive analysis. The idea is preety simple, we have a dataset and we have
4+
a feature's associated with it. The Features should be choose very cautiously
5+
as they determine, how much our model will be able to make future predictions.
6+
We try to set these Feature weights, over many iterations, so that they best
7+
fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs
8+
Rating). We try to best fit a line through dataset and estimate the parameters.
9+
"""
10+
11+
import requests
12+
import numpy as np
13+
14+
15+
def collect_dataset():
16+
""" Collect dataset of CSGO
17+
The dataset contains ADR vs Rating of a Player
18+
:return : dataset obtained from the link, as matrix
19+
"""
20+
response = requests.get('https://raw.githubusercontent.com/yashLadha/' +
21+
'The_Math_of_Intelligence/master/Week1/ADRvs' +
22+
'Rating.csv')
23+
lines = response.text.splitlines()
24+
data = []
25+
for item in lines:
26+
item = item.split(',')
27+
data.append(item)
28+
data.pop(0) # This is for removing the labels from the list
29+
dataset = np.matrix(data)
30+
return dataset
31+
32+
33+
def run_steep_gradient_descent(data_x, data_y,
34+
len_data, alpha, theta):
35+
""" Run steep gradient descent and updates the Feature vector accordingly_
36+
:param data_x : contains the dataset
37+
:param data_y : contains the output associated with each data-entry
38+
:param len_data : length of the data_
39+
:param alpha : Learning rate of the model
40+
:param theta : Feature vector (weight's for our model)
41+
;param return : Updated Feature's, using
42+
curr_features - alpha_ * gradient(w.r.t. feature)
43+
"""
44+
n = len_data
45+
46+
prod = np.dot(theta, data_x.transpose())
47+
prod -= data_y.transpose()
48+
sum_grad = np.dot(prod, data_x)
49+
theta = theta - (alpha / n) * sum_grad
50+
return theta
51+
52+
53+
def sum_of_square_error(data_x, data_y, len_data, theta):
54+
""" Return sum of square error for error calculation
55+
:param data_x : contains our dataset
56+
:param data_y : contains the output (result vector)
57+
:param len_data : len of the dataset
58+
:param theta : contains the feature vector
59+
:return : sum of square error computed from given feature's
60+
"""
61+
error = 0.0
62+
prod = np.dot(theta, data_x.transpose())
63+
prod -= data_y.transpose()
64+
sum_elem = np.sum(np.square(prod))
65+
error = sum_elem / (2 * len_data)
66+
return error
67+
68+
69+
def run_linear_regression(data_x, data_y):
70+
""" Implement Linear regression over the dataset
71+
:param data_x : contains our dataset
72+
:param data_y : contains the output (result vector)
73+
:return : feature for line of best fit (Feature vector)
74+
"""
75+
iterations = 100000
76+
alpha = 0.0001550
77+
78+
no_features = data_x.shape[1]
79+
len_data = data_x.shape[0] - 1
80+
81+
theta = np.zeros((1, no_features))
82+
83+
for i in range(0, iterations):
84+
theta = run_steep_gradient_descent(data_x, data_y,
85+
len_data, alpha, theta)
86+
error = sum_of_square_error(data_x, data_y, len_data, theta)
87+
print('At Iteration %d - Error is %.5f ' % (i + 1, error))
88+
89+
return theta
90+
91+
92+
def main():
93+
""" Driver function """
94+
data = collect_dataset()
95+
96+
len_data = data.shape[0]
97+
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
98+
data_y = data[:, -1].astype(float)
99+
100+
theta = run_linear_regression(data_x, data_y)
101+
len_result = theta.shape[1]
102+
print('Resultant Feature vector : ')
103+
for i in range(0, len_result):
104+
print('%.5f' % (theta[0, i]))
105+
106+
107+
if __name__ == '__main__':
108+
main()

0 commit comments

Comments
 (0)