| 
 | 1 | +{  | 
 | 2 | +  "cells": [  | 
 | 3 | +    {  | 
 | 4 | +      "cell_type": "code",  | 
 | 5 | +      "execution_count": null,  | 
 | 6 | +      "metadata": {  | 
 | 7 | +        "id": "NNamP65y8eGf"  | 
 | 8 | +      },  | 
 | 9 | +      "outputs": [],  | 
 | 10 | +      "source": [  | 
 | 11 | +        "from sklearn import datasets\n",  | 
 | 12 | +        "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",  | 
 | 13 | +        "from sklearn.decomposition import PCA, KernelPCA\n",  | 
 | 14 | +        "from sklearn.datasets import make_circles\n",  | 
 | 15 | +        "from sklearn.preprocessing import StandardScaler\n",  | 
 | 16 | +        "from sklearn.decomposition import NMF\n",  | 
 | 17 | +        "from sklearn.decomposition import TruncatedSVD\n",  | 
 | 18 | +        "from scipy.sparse import csr_matrix"  | 
 | 19 | +      ]  | 
 | 20 | +    },  | 
 | 21 | +    {  | 
 | 22 | +      "cell_type": "code",  | 
 | 23 | +      "execution_count": 2,  | 
 | 24 | +      "metadata": {  | 
 | 25 | +        "colab": {  | 
 | 26 | +          "base_uri": "https://localhost:8080/"  | 
 | 27 | +        },  | 
 | 28 | +        "id": "fvJfKhFq8hQc",  | 
 | 29 | +        "outputId": "acbc4c59-acbd-4ff4-bacb-e54b55e0312f"  | 
 | 30 | +      },  | 
 | 31 | +      "outputs": [  | 
 | 32 | +        {  | 
 | 33 | +          "name": "stdout",  | 
 | 34 | +          "output_type": "stream",  | 
 | 35 | +          "text": [  | 
 | 36 | +            "Original number of features: 64\n",  | 
 | 37 | +            "Reduced number of features: 40\n"  | 
 | 38 | +          ]  | 
 | 39 | +        }  | 
 | 40 | +      ],  | 
 | 41 | +      "source": [  | 
 | 42 | +        "# Load the data\n",  | 
 | 43 | +        "digits = datasets.load_digits()\n",  | 
 | 44 | +        "# Feature matrix standardization\n",  | 
 | 45 | +        "features = StandardScaler().fit_transform(digits.data)\n",  | 
 | 46 | +        "# Perform PCA While retaining 80% of variance\n",  | 
 | 47 | +        "pca = PCA(n_components=0.95, whiten=True)\n",  | 
 | 48 | +        "# perform PCA\n",  | 
 | 49 | +        "pcafeatures = pca.fit_transform(features)\n",  | 
 | 50 | +        "# Display results\n",  | 
 | 51 | +        "print(\"Original number of features:\", features.shape[1])\n",  | 
 | 52 | +        "print(\"Reduced number of features:\", pcafeatures.shape[1])"  | 
 | 53 | +      ]  | 
 | 54 | +    },  | 
 | 55 | +    {  | 
 | 56 | +      "cell_type": "code",  | 
 | 57 | +      "execution_count": 3,  | 
 | 58 | +      "metadata": {  | 
 | 59 | +        "colab": {  | 
 | 60 | +          "base_uri": "https://localhost:8080/"  | 
 | 61 | +        },  | 
 | 62 | +        "id": "jyU800Lf8it4",  | 
 | 63 | +        "outputId": "0d4c73bf-7d08-48e6-a44f-a5647a2e0c11"  | 
 | 64 | +      },  | 
 | 65 | +      "outputs": [  | 
 | 66 | +        {  | 
 | 67 | +          "name": "stdout",  | 
 | 68 | +          "output_type": "stream",  | 
 | 69 | +          "text": [  | 
 | 70 | +            "Original number of features: 2\n",  | 
 | 71 | +            "Reduced number of features: 1\n"  | 
 | 72 | +          ]  | 
 | 73 | +        }  | 
 | 74 | +      ],  | 
 | 75 | +      "source": [  | 
 | 76 | +        "# Creation of the linearly inseparable data\n",  | 
 | 77 | +        "features, _ = make_circles(n_samples=2000, random_state=1, noise=0.1, factor=0.1)\n",  | 
 | 78 | +        "# kernal PCA with radius basis function (RBF) kernel application\n",  | 
 | 79 | +        "k_pca = KernelPCA(kernel=\"rbf\", gamma=16, n_components=1)\n",  | 
 | 80 | +        "k_pcaf = k_pca.fit_transform(features)\n",  | 
 | 81 | +        "print(\"Original number of features:\", features.shape[1])\n",  | 
 | 82 | +        "print(\"Reduced number of features:\", k_pcaf.shape[1])"  | 
 | 83 | +      ]  | 
 | 84 | +    },  | 
 | 85 | +    {  | 
 | 86 | +      "cell_type": "code",  | 
 | 87 | +      "execution_count": 4,  | 
 | 88 | +      "metadata": {  | 
 | 89 | +        "colab": {  | 
 | 90 | +          "base_uri": "https://localhost:8080/"  | 
 | 91 | +        },  | 
 | 92 | +        "id": "IfCo5TA28kn6",  | 
 | 93 | +        "outputId": "312956a9-9fb5-4296-d766-a3e642649da1"  | 
 | 94 | +      },  | 
 | 95 | +      "outputs": [  | 
 | 96 | +        {  | 
 | 97 | +          "name": "stdout",  | 
 | 98 | +          "output_type": "stream",  | 
 | 99 | +          "text": [  | 
 | 100 | +            "number of features(original): 4\n",  | 
 | 101 | +            "number of features that was reduced: 1\n"  | 
 | 102 | +          ]  | 
 | 103 | +        }  | 
 | 104 | +      ],  | 
 | 105 | +      "source": [  | 
 | 106 | +        "#flower dataset loading:\n",  | 
 | 107 | +        "iris = datasets.load_iris()\n",  | 
 | 108 | +        "features = iris.data\n",  | 
 | 109 | +        "target = iris.target\n",  | 
 | 110 | +        "# Creation of LDA. Use of LDA for features transformation\n",  | 
 | 111 | +        "lda = LinearDiscriminantAnalysis(n_components=1)\n",  | 
 | 112 | +        "features_lda = lda.fit(features, target).transform(features)\n",  | 
 | 113 | +        "# Print the number of features\n",  | 
 | 114 | +        "print(\"number of features(original):\", features.shape[1])\n",  | 
 | 115 | +        "print(\"number of features that was reduced:\", features_lda.shape[1])"  | 
 | 116 | +      ]  | 
 | 117 | +    },  | 
 | 118 | +    {  | 
 | 119 | +      "cell_type": "code",  | 
 | 120 | +      "execution_count": 5,  | 
 | 121 | +      "metadata": {  | 
 | 122 | +        "colab": {  | 
 | 123 | +          "base_uri": "https://localhost:8080/"  | 
 | 124 | +        },  | 
 | 125 | +        "id": "yjQBlMtM8mQu",  | 
 | 126 | +        "outputId": "800279fb-f44b-43e8-9210-a35b8e190fc7"  | 
 | 127 | +      },  | 
 | 128 | +      "outputs": [  | 
 | 129 | +        {  | 
 | 130 | +          "data": {  | 
 | 131 | +            "text/plain": [  | 
 | 132 | +              "array([0.9912126])"  | 
 | 133 | +            ]  | 
 | 134 | +          },  | 
 | 135 | +          "execution_count": 5,  | 
 | 136 | +          "metadata": {},  | 
 | 137 | +          "output_type": "execute_result"  | 
 | 138 | +        }  | 
 | 139 | +      ],  | 
 | 140 | +      "source": [  | 
 | 141 | +        "lda.explained_variance_ratio_"  | 
 | 142 | +      ]  | 
 | 143 | +    },  | 
 | 144 | +    {  | 
 | 145 | +      "cell_type": "code",  | 
 | 146 | +      "execution_count": 10,  | 
 | 147 | +      "metadata": {  | 
 | 148 | +        "colab": {  | 
 | 149 | +          "base_uri": "https://localhost:8080/"  | 
 | 150 | +        },  | 
 | 151 | +        "id": "tHOWTxn18nf7",  | 
 | 152 | +        "outputId": "ae3c857a-0ca8-4508-affc-b5ea4dff6788"  | 
 | 153 | +      },  | 
 | 154 | +      "outputs": [  | 
 | 155 | +        {  | 
 | 156 | +          "data": {  | 
 | 157 | +            "text/plain": [  | 
 | 158 | +              "1"  | 
 | 159 | +            ]  | 
 | 160 | +          },  | 
 | 161 | +          "execution_count": 10,  | 
 | 162 | +          "metadata": {},  | 
 | 163 | +          "output_type": "execute_result"  | 
 | 164 | +        }  | 
 | 165 | +      ],  | 
 | 166 | +      "source": [  | 
 | 167 | +        "# Load Iris flower dataset:\n",  | 
 | 168 | +        "iris123 = datasets.load_iris()\n",  | 
 | 169 | +        "features = iris123.data\n",  | 
 | 170 | +        "target = iris123.target\n",  | 
 | 171 | +        "# Create and run LDA\n",  | 
 | 172 | +        "lda_r = LinearDiscriminantAnalysis(n_components=None)\n",  | 
 | 173 | +        "features_lda = lda_r.fit(features, target)\n",  | 
 | 174 | +        "# array of explained variance ratios\n",  | 
 | 175 | +        "lda_var_r = lda_r.explained_variance_ratio_\n",  | 
 | 176 | +        "# function ceration\n",  | 
 | 177 | +        "def select_n_c(v_ratio, g_var: float) -> int:\n",  | 
 | 178 | +        "    # initial variance explained setting\n",  | 
 | 179 | +        "    total_v = 0.0\n",  | 
 | 180 | +        "    # number of features initialisation\n",  | 
 | 181 | +        "    n_components = 0\n",  | 
 | 182 | +        "    # If we consider explained variance of each feature:\n",  | 
 | 183 | +        "    for explained_v in v_ratio:\n",  | 
 | 184 | +        "        # explained variance addition to the total\n",  | 
 | 185 | +        "        total_v += explained_v\n",  | 
 | 186 | +        "        # add one to number of components\n",  | 
 | 187 | +        "        n_components += 1\n",  | 
 | 188 | +        "        # we attain our goal level of explained variance\n",  | 
 | 189 | +        "        if total_v >= g_var:\n",  | 
 | 190 | +        "            # end the loop\n",  | 
 | 191 | +        "            break\n",  | 
 | 192 | +        "    # return the number of components\n",  | 
 | 193 | +        "    return n_components\n",  | 
 | 194 | +        "\n",  | 
 | 195 | +        "# run the function\n",  | 
 | 196 | +        "select_n_c(lda_var_r, 0.95)"  | 
 | 197 | +      ]  | 
 | 198 | +    },  | 
 | 199 | +    {  | 
 | 200 | +      "cell_type": "code",  | 
 | 201 | +      "execution_count": 7,  | 
 | 202 | +      "metadata": {  | 
 | 203 | +        "colab": {  | 
 | 204 | +          "base_uri": "https://localhost:8080/"  | 
 | 205 | +        },  | 
 | 206 | +        "id": "12zwY1Du8o6i",  | 
 | 207 | +        "outputId": "e9178fdf-2195-41cc-f4c3-a1e52c030df5"  | 
 | 208 | +      },  | 
 | 209 | +      "outputs": [  | 
 | 210 | +        {  | 
 | 211 | +          "name": "stderr",  | 
 | 212 | +          "output_type": "stream",  | 
 | 213 | +          "text": [  | 
 | 214 | +            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:294: FutureWarning: The 'init' value, when 'init=None' and n_components is less than n_samples and n_features, will be changed from 'nndsvd' to 'nndsvda' in 1.1 (renaming of 0.26).\n",  | 
 | 215 | +            "  FutureWarning,\n"  | 
 | 216 | +          ]  | 
 | 217 | +        },  | 
 | 218 | +        {  | 
 | 219 | +          "name": "stdout",  | 
 | 220 | +          "output_type": "stream",  | 
 | 221 | +          "text": [  | 
 | 222 | +            "Original number of features: 64\n",  | 
 | 223 | +            "Reduced number of features: 12\n"  | 
 | 224 | +          ]  | 
 | 225 | +        },  | 
 | 226 | +        {  | 
 | 227 | +          "name": "stderr",  | 
 | 228 | +          "output_type": "stream",  | 
 | 229 | +          "text": [  | 
 | 230 | +            "/usr/local/lib/python3.7/dist-packages/sklearn/decomposition/_nmf.py:1641: ConvergenceWarning: Maximum number of iterations 200 reached. Increase it to improve convergence.\n",  | 
 | 231 | +            "  ConvergenceWarning,\n"  | 
 | 232 | +          ]  | 
 | 233 | +        }  | 
 | 234 | +      ],  | 
 | 235 | +      "source": [  | 
 | 236 | +        "# data loading\n",  | 
 | 237 | +        "digit = datasets.load_digits()\n",  | 
 | 238 | +        "# feature matrix loading\n",  | 
 | 239 | +        "feature_m = digit.data\n",  | 
 | 240 | +        "# Creation, fit and application of NMF\n",  | 
 | 241 | +        "n_mf = NMF(n_components=12, random_state=1)\n",  | 
 | 242 | +        "features_nmf = n_mf.fit_transform(feature_m)\n",  | 
 | 243 | +        "# Show results\n",  | 
 | 244 | +        "print(\"Original number of features:\", feature_m.shape[1])\n",  | 
 | 245 | +        "print(\"Reduced number of features:\", features_nmf.shape[1])"  | 
 | 246 | +      ]  | 
 | 247 | +    },  | 
 | 248 | +    {  | 
 | 249 | +      "cell_type": "code",  | 
 | 250 | +      "execution_count": 8,  | 
 | 251 | +      "metadata": {  | 
 | 252 | +        "colab": {  | 
 | 253 | +          "base_uri": "https://localhost:8080/"  | 
 | 254 | +        },  | 
 | 255 | +        "id": "wrEYF9Ql8qtU",  | 
 | 256 | +        "outputId": "c28d28be-4f0b-4bd7-bb56-fde6ead38a45"  | 
 | 257 | +      },  | 
 | 258 | +      "outputs": [  | 
 | 259 | +        {  | 
 | 260 | +          "name": "stdout",  | 
 | 261 | +          "output_type": "stream",  | 
 | 262 | +          "text": [  | 
 | 263 | +            "Original number of features: 64\n",  | 
 | 264 | +            "Reduced number of features: 12\n"  | 
 | 265 | +          ]  | 
 | 266 | +        }  | 
 | 267 | +      ],  | 
 | 268 | +      "source": [  | 
 | 269 | +        "# data loading\n",  | 
 | 270 | +        "digit123 = datasets.load_digits()\n",  | 
 | 271 | +        "#  feature matrix Standardization\n",  | 
 | 272 | +        "features_m = StandardScaler().fit_transform(digit123.data)\n",  | 
 | 273 | +        "# sparse matrix creation\n",  | 
 | 274 | +        "f_sparse = csr_matrix(features_m)\n",  | 
 | 275 | +        "# TSVD creation\n",  | 
 | 276 | +        "tsvd = TruncatedSVD(n_components=12)\n",  | 
 | 277 | +        "# sparse matrix TSVD\n",  | 
 | 278 | +        "features_sp_tsvd = tsvd.fit(f_sparse).transform(f_sparse)\n",  | 
 | 279 | +        "# results\n",  | 
 | 280 | +        "print(\"Original number of features:\", f_sparse.shape[1])\n",  | 
 | 281 | +        "print(\"Reduced number of features:\", features_sp_tsvd.shape[1])"  | 
 | 282 | +      ]  | 
 | 283 | +    },  | 
 | 284 | +    {  | 
 | 285 | +      "cell_type": "code",  | 
 | 286 | +      "execution_count": 9,  | 
 | 287 | +      "metadata": {  | 
 | 288 | +        "colab": {  | 
 | 289 | +          "base_uri": "https://localhost:8080/"  | 
 | 290 | +        },  | 
 | 291 | +        "id": "xRQ_nUf_8sZA",  | 
 | 292 | +        "outputId": "19b8d99c-b330-406d-e728-407c18d82f20"  | 
 | 293 | +      },  | 
 | 294 | +      "outputs": [  | 
 | 295 | +        {  | 
 | 296 | +          "data": {  | 
 | 297 | +            "text/plain": [  | 
 | 298 | +              "0.3003938539283667"  | 
 | 299 | +            ]  | 
 | 300 | +          },  | 
 | 301 | +          "execution_count": 9,  | 
 | 302 | +          "metadata": {},  | 
 | 303 | +          "output_type": "execute_result"  | 
 | 304 | +        }  | 
 | 305 | +      ],  | 
 | 306 | +      "source": [  | 
 | 307 | +        "# Sum of first three components' explained variance ratios\n",  | 
 | 308 | +        "tsvd.explained_variance_ratio_[0:3].sum()"  | 
 | 309 | +      ]  | 
 | 310 | +    },  | 
 | 311 | +    {  | 
 | 312 | +      "cell_type": "code",  | 
 | 313 | +      "execution_count": null,  | 
 | 314 | +      "metadata": {  | 
 | 315 | +        "id": "zbExVkXp8vpi"  | 
 | 316 | +      },  | 
 | 317 | +      "outputs": [],  | 
 | 318 | +      "source": []  | 
 | 319 | +    }  | 
 | 320 | +  ],  | 
 | 321 | +  "metadata": {  | 
 | 322 | +    "colab": {  | 
 | 323 | +      "name": "DimentionalityReductionUsingFeatureExtraction_PythonCodeTutorial.ipynb",  | 
 | 324 | +      "provenance": []  | 
 | 325 | +    },  | 
 | 326 | +    "interpreter": {  | 
 | 327 | +      "hash": "f89a88aed07bbcd763ac68893150ace71e487877d8c6527a76855322f20001c6"  | 
 | 328 | +    },  | 
 | 329 | +    "kernelspec": {  | 
 | 330 | +      "display_name": "Python 3.9.12 64-bit",  | 
 | 331 | +      "language": "python",  | 
 | 332 | +      "name": "python3"  | 
 | 333 | +    },  | 
 | 334 | +    "language_info": {  | 
 | 335 | +      "name": "python",  | 
 | 336 | +      "version": "3.9.12"  | 
 | 337 | +    }  | 
 | 338 | +  },  | 
 | 339 | +  "nbformat": 4,  | 
 | 340 | +  "nbformat_minor": 0  | 
 | 341 | +}  | 
0 commit comments