Skip to content

added ridge regression #12250

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Prev Previous commit
Next Next commit
resolved errors
  • Loading branch information
ankana2113 committed Oct 23, 2024
commit 1713cbe7c20864f6d3eaa2d1b521ef1a1da4828d
Empty file.
55 changes: 29 additions & 26 deletions machine_learning/ridge_regression/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,54 +3,57 @@


class RidgeRegression:
def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterations:int=1000) -> None:
def __init__(self,
alpha:float=0.001,
regularization_param:float=0.1,
num_iterations:int=1000) -> None:
self.alpha:float = alpha
self.regularization_param:float = regularization_param
self.num_iterations:int = num_iterations
self.theta:np.ndarray = None


def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)
def feature_scaling(self, x:np.ndarray)-> tuple[np.ndarray, np.ndarray, np.ndarray]:
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)

# avoid division by zero for constant features (std = 0)
std[std == 0] = 1 # set std=1 for constant features to avoid NaN

X_scaled = (X - mean) / std
return X_scaled, mean, std
x_scaled = (x - mean) / std
return x_scaled, mean, std


def fit(self, X:np.ndarray, y:np.ndarray) -> None:
X_scaled, mean, std = self.feature_scaling(X)
m, n = X_scaled.shape
def fit(self, x:np.ndarray, y:np.ndarray) -> None:
x_scaled, mean, std = self.feature_scaling(x)
m, n = x_scaled.shape
self.theta = np.zeros(n) # initializing weights to zeros


for i in range(self.num_iterations):
predictions = X_scaled.dot(self.theta)
predictions = x_scaled.dot(self.theta)
error = predictions - y

# computing gradient with L2 regularization
gradient = (
X_scaled.T.dot(error) + self.regularization_param * self.theta
x_scaled.T.dot(error) + self.regularization_param * self.theta
) / m
self.theta -= self.alpha * gradient # updating weights


def predict(self, X:np.ndarray) -> np.ndarray:
X_scaled, _, _ = self.feature_scaling(X)
return X_scaled.dot(self.theta)
def predict(self, x:np.ndarray) -> np.ndarray:
x_scaled, _, _ = self.feature_scaling(x)
return x_scaled.dot(self.theta)


def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float:
X_scaled, _, _ = self.feature_scaling(X)
def compute_cost(self, x:np.ndarray, y:np.ndarray) -> float:
x_scaled, _, _ = self.feature_scaling(x)
m = len(y)

predictions = X_scaled.dot(self.theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
predictions = x_scaled.dot(self.theta)
cost = (
1 / (2 * m)) * np.sum((predictions - y) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
return cost


Expand All @@ -61,21 +64,21 @@ def mean_absolute_error(self, y_true:np.ndarray, y_pred:np.ndarray) -> float:
# Example usage
if __name__ == "__main__":
df = pd.read_csv("ADRvsRating.csv")
X = df[["Rating"]].values
x = df[["Rating"]].values
y = df["ADR"].values
y = (y - np.mean(y)) / np.std(y)

# added bias term to the feature matrix
X = np.c_[np.ones(X.shape[0]), X]
x = np.c_[np.ones(x.shape[0]), x]

# initialize and train the ridge regression model
model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
model.fit(X, y)
model.fit(x, y)

# predictions
predictions = model.predict(X)
predictions = model.predict(x)

# results
print("Optimized Weights:", model.theta)
print("Cost:", model.compute_cost(X, y))
print("Cost:", model.compute_cost(x, y))
print("Mean Absolute Error:", model.mean_absolute_error(y, predictions))