pashby
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎machine-learning/dimensionality-reduction-feature-extraction/DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb‎
Lines changed: 341 additions & 0 deletions b/‎machine-learning/dimensionality-reduction-feature-extraction/DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb‎
Lines changed: 341 additions & 0 deletions
diff --git a/‎machine-learning/dimensionality-reduction-feature-extraction/README.md‎
Lines changed: 1 addition & 0 deletions b/‎machine-learning/dimensionality-reduction-feature-extraction/README.md‎
Lines changed: 1 addition & 0 deletions
@@ -91,6 +91,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [Logistic Regression using PyTorch in Python](https://www.thepythoncode.com/article/logistic-regression-using-pytorch). ([code](machine-learning/logistic-regression-in-pytorch))
     - [Dropout Regularization using PyTorch in Python](https://www.thepythoncode.com/article/dropout-regularization-in-pytorch). ([code](machine-learning/dropout-in-pytorch))
     - [K-Fold Cross Validation using Scikit-Learn in Python](https://www.thepythoncode.com/article/kfold-cross-validation-using-sklearn-in-python). ([code](machine-learning/k-fold-cross-validation-sklearn))
+    - [Dimensionality Reduction: Feature Extraction using Scikit-learn in Python](https://www.thepythoncode.com/article/dimensionality-reduction-using-feature-extraction-sklearn). ([code](machine-learning/dimensionality-reduction-feature-extraction))
 
 - ### [General Python Topics](https://www.thepythoncode.com/topic/general-python-topics)
     - [How to Make Facebook Messenger bot in Python](https://www.thepythoncode.com/article/make-bot-fbchat-python). ([code](general/messenger-bot))
 
@@ -0,0 +1,341 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NNamP65y8eGf"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn import datasets\n",
+        "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+        "from sklearn.decomposition import PCA, KernelPCA\n",
+        "from sklearn.datasets import make_circles\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "from sklearn.decomposition import NMF\n",
+        "from sklearn.decomposition import TruncatedSVD\n",
+        "from scipy.sparse import csr_matrix"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "fvJfKhFq8hQc",
+        "outputId": "acbc4c59-acbd-4ff4-bacb-e54b55e0312f"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Original number of features: 64\n",
+            "Reduced number of features: 40\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Load the data\n",
+        "digits = datasets.load_digits()\n",
+        "# Feature matrix standardization\n",
+        "features = StandardScaler().fit_transform(digits.data)\n",
+        "# Perform PCA While retaining 80% of variance\n",
+        "pca = PCA(n_components=0.95, whiten=True)\n",
+        "# perform PCA\n",
+        "pcafeatures = pca.fit_transform(features)\n",
+        "# Display results\n",
+        "print(\"Original number of features:\", features.shape[1])\n",
+        "print(\"Reduced number of features:\", pcafeatures.shape[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jyU800Lf8it4",
+        "outputId": "0d4c73bf-7d08-48e6-a44f-a5647a2e0c11"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Original number of features: 2\n",
+            "Reduced number of features: 1\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Creation of the linearly inseparable data\n",
+        "features, _ = make_circles(n_samples=2000, random_state=1, noise=0.1, factor=0.1)\n",
+        "# kernal PCA with radius basis function (RBF) kernel application\n",
+        "k_pca = KernelPCA(kernel=\"rbf\", gamma=16, n_components=1)\n",
+        "k_pcaf = k_pca.fit_transform(features)\n",
+        "print(\"Original number of features:\", features.shape[1])\n",
+        "print(\"Reduced number of features:\", k_pcaf.shape[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IfCo5TA28kn6",
+        "outputId": "312956a9-9fb5-4296-d766-a3e642649da1"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "number of features(original): 4\n",
+            "number of features that was reduced: 1\n"
+          ]
+        }
+      ],
+      "source": [
+        "#flower dataset loading:\n",
+        "iris = datasets.load_iris()\n",
+        "features = iris.data\n",
+        "target = iris.target\n",
+        "# Creation of LDA. Use of LDA for features transformation\n",
+        "lda = LinearDiscriminantAnalysis(n_components=1)\n",
+        "features_lda = lda.fit(features, target).transform(features)\n",
+        "# Print the number of features\n",
+        "print(\"number of features(original):\", features.shape[1])\n",
+        "print(\"number of features that was reduced:\", features_lda.shape[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yjQBlMtM8mQu",
+        "outputId": "800279fb-f44b-43e8-9210-a35b8e190fc7"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "array([0.9912126])"
+            ]
+          },
+          "execution_count": 5,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "lda.explained_variance_ratio_"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "tHOWTxn18nf7",
+        "outputId": "ae3c857a-0ca8-4508-affc-b5ea4dff6788"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "1"
+            ]
+          },
+          "execution_count": 10,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Load Iris flower dataset:\n",
+        "iris123 = datasets.load_iris()\n",
+        "features = iris123.data\n",
+        "target = iris123.target\n",
+        "# Create and run LDA\n",
+        "lda_r = LinearDiscriminantAnalysis(n_components=None)\n",
+        "features_lda = lda_r.fit(features, target)\n",
+        "# array of explained variance ratios\n",
+        "lda_var_r = lda_r.explained_variance_ratio_\n",
+        "# function ceration\n",
+        "def select_n_c(v_ratio, g_var: float) -> int:\n",
+        "    # initial variance explained setting\n",
+        "    total_v = 0.0\n",
+        "    # number of features initialisation\n",
+        "    n_components = 0\n",
+        "    # If we consider explained variance of each feature:\n",
+        "    for explained_v in v_ratio:\n",
+        "        # explained variance addition to the total\n",
+        "        total_v += explained_v\n",
+        "        # add one to number of components\n",
+        "        n_components += 1\n",
+        "        # we attain our goal level of explained variance\n",
+        "        if total_v >= g_var:\n",
+        "            # end the loop\n",
+        "            break\n",
+        "    # return the number of components\n",
+        "    return n_components\n",
+        "\n",
+        "# run the function\n",
+        "select_n_c(lda_var_r, 0.95)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "12zwY1Du8o6i",
+        "outputId": "e9178fdf-2195-41cc-f4c3-a1e52c030df5"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:294: FutureWarning: The 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).\n",
+            "  FutureWarning,\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Original number of features: 64\n",
+            "Reduced number of features: 12\n"
+          ]
+        },
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:1641: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.\n",
+            "  ConvergenceWarning,\n"
+          ]
+        }
+      ],
+      "source": [
+        "# data loading\n",
+        "digit = datasets.load_digits()\n",
+        "# feature matrix loading\n",
+        "feature_m = digit.data\n",
+        "# Creation, fit and application of NMF\n",
+        "n_mf = NMF(n_components=12, random_state=1)\n",
+        "features_nmf = n_mf.fit_transform(feature_m)\n",
+        "# Show results\n",
+        "print(\"Original number of features:\", feature_m.shape[1])\n",
+        "print(\"Reduced number of features:\", features_nmf.shape[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wrEYF9Ql8qtU",
+        "outputId": "c28d28be-4f0b-4bd7-bb56-fde6ead38a45"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Original number of features: 64\n",
+            "Reduced number of features: 12\n"
+          ]
+        }
+      ],
+      "source": [
+        "# data loading\n",
+        "digit123 = datasets.load_digits()\n",
+        "#  feature matrix Standardization\n",
+        "features_m = StandardScaler().fit_transform(digit123.data)\n",
+        "# sparse matrix creation\n",
+        "f_sparse = csr_matrix(features_m)\n",
+        "# TSVD creation\n",
+        "tsvd = TruncatedSVD(n_components=12)\n",
+        "# sparse matrix TSVD\n",
+        "features_sp_tsvd = tsvd.fit(f_sparse).transform(f_sparse)\n",
+        "# results\n",
+        "print(\"Original number of features:\", f_sparse.shape[1])\n",
+        "print(\"Reduced number of features:\", features_sp_tsvd.shape[1])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xRQ_nUf_8sZA",
+        "outputId": "19b8d99c-b330-406d-e728-407c18d82f20"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "0.3003938539283667"
+            ]
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# Sum of first three components' explained variance ratios\n",
+        "tsvd.explained_variance_ratio_[0:3].sum()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "zbExVkXp8vpi"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb",
+      "provenance": []
+    },
+    "interpreter": {
+      "hash": "f89a88aed07bbcd763ac68893150ace71e487877d8c6527a76855322f20001c6"
+    },
+    "kernelspec": {
+      "display_name": "Python 3.9.12 64-bit",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.9.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1 @@
+# [Dimensionality Reduction: Feature Extraction using Scikit-learn in Python](https://www.thepythoncode.com/article/dimensionality-reduction-using-feature-extraction-sklearn)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# [Dimensionality Reduction: Feature Extraction using Scikit-learn in Python](https://www.thepythoncode.com/article/dimensionality-reduction-using-feature-extraction-sklearn)`