Skip to content

Fixes: #12108: Add Ridge regression implementation to machine_learning #12251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
resolved conflicts
  • Loading branch information
ankana2113 committed Oct 23, 2024
commit 544a38b016d2d596b66294c7268623822d58e17c
38 changes: 19 additions & 19 deletions machine_learning/ridge_regression/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,21 @@


class RidgeRegression:
def __init__(
self,
alpha: float = 0.001,
regularization_param: float = 0.1,
num_iterations: int = 1000,
) -> None:
def __init__(self,
alpha: float = 0.001,
regularization_param: float = 0.1,
num_iterations: int = 1000,
) -> None:
self.alpha: float = alpha
self.regularization_param: float = regularization_param
self.num_iterations: int = num_iterations
self.theta: np.ndarray = None

def feature_scaling(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression/model.py, please provide doctest for the function feature_scaling

self, X: np.ndarray
self, x: np.ndarray

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide descriptive name for the parameter: x

) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
mean = np.mean(X, axis=0)
std = np.std(X, axis=0)
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)

# avoid division by zero for constant features (std = 0)
std[std == 0] = 1 # set std=1 for constant features to avoid NaN
Expand All @@ -31,7 +30,7 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
m, n = x_scaled.shape
self.theta = np.zeros(n) # initializing weights to zeros

for i in range(self.num_iterations):
for _ in range(self.num_iterations):
predictions = x_scaled.dot(self.theta)
error = predictions - y

Expand All @@ -41,18 +40,19 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
) / m
self.theta -= self.alpha * gradient # updating weights

def predict(self, X: np.ndarray) -> np.ndarray:
X_scaled, _, _ = self.feature_scaling(X)
return X_scaled.dot(self.theta)
def predict(self, x: np.ndarray) -> np.ndarray:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression/model.py, please provide doctest for the function predict

Please provide descriptive name for the parameter: x

x_scaled, _, _ = self.feature_scaling(x)
return x_scaled.dot(self.theta)

def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float:
x_scaled, _, _ = self.feature_scaling(x)
m = len(y)

predictions = x_scaled.dot(self.theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
cost = (
1 / (2 * m)) * np.sum((predictions - y) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
return cost

def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression/model.py, please provide doctest for the function mean_absolute_error

Expand All @@ -61,9 +61,9 @@ def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:

# Example usage
if __name__ == "__main__":
df = pd.read_csv("ADRvsRating.csv")
x = df[["Rating"]].values
y = df["ADR"].values
data = pd.read_csv("ADRvsRating.csv")
x = data[["Rating"]].to_numpy()
y = data["ADR"].to_numpy()
y = (y - np.mean(y)) / np.std(y)

# added bias term to the feature matrix
Expand Down