bigfreecoder
diff --git a/‎CNN_model.ipynb
Lines changed: 370 additions & 0 deletions b/‎CNN_model.ipynb
Lines changed: 370 additions & 0 deletions
@@ -0,0 +1,370 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 全家桶 使用 Pytorch 必备 具体功能且看下文\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "from torch.autograd import Variable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CNN(nn.Module):\n",
+    "    def __init__(self, output_dimesion, vocab_size, dropout_rate, emb_dim, max_len, n_filters, init_W=None):\n",
+    "        # number_filters\n",
+    "        super(CNN, self).__init__()\n",
+    "\n",
+    "        self.max_len = max_len\n",
+    "        max_features = vocab_size\n",
+    "        vanila_dimension = 200 #倒数第二层的节点数\n",
+    "        projection_dimension = output_dimesion #输出层的节点数\n",
+    "        self.qual_conv_set = {}    \n",
+    "\n",
+    "        '''Embedding Layer'''\n",
+    "        if init_W is None:\n",
+    "            # 先尝试使用embedding随机赋值\n",
+    "            self.embedding = nn.Embedding(max_features, emb_dim)\n",
+    "\n",
+    "        self.conv1 = nn.Sequential(\n",
+    "            # 卷积层的激活函数\n",
+    "            nn.Conv2d(1, n_filters, kernel_size=(3, emb_dim)),\n",
+    "            nn.ReLU(),\n",
+    "            nn.MaxPool2d(kernel_size=(max_len - 3 + 1, 1))\n",
+    "        )\n",
+    "        self.conv2 = nn.Sequential(\n",
+    "            nn.Conv2d(1, n_filters, kernel_size=(4, emb_dim)),\n",
+    "            nn.ReLU(),\n",
+    "            nn.MaxPool2d(kernel_size=(max_len - 4 + 1, 1))\n",
+    "        )\n",
+    "        self.conv3 = nn.Sequential(\n",
+    "            nn.Conv2d(1, n_filters, kernel_size=(5, emb_dim)),\n",
+    "            nn.ReLU(),\n",
+    "            nn.MaxPool2d(kernel_size=(max_len - 5 + 1, 1))\n",
+    "        )\n",
+    "        \n",
+    "        '''Dropout Layer'''\n",
+    "        #layer = Dense(vanila_dimension, activation='tanh')(flatten_layer)\n",
+    "        #layer = Dropout(dropout_rate)(layer)\n",
+    "        self.layer = nn.Linear(300, vanila_dimension)\n",
+    "        self.dropout = nn.Dropout(dropout_rate)\n",
+    "\n",
+    "        '''Projection Layer & Output Layer'''\n",
+    "        #output_layer = Dense(projection_dimension, activation='tanh')(layer)\n",
+    "        self.output_layer = nn.Linear(vanila_dimension, projection_dimension)\n",
+    "\n",
+    "        \n",
+    "\n",
+    "    def forward(self, input):\n",
+    "        embeds = self.embedding(input)\n",
+    "        # concatenate the tensors\n",
+    "        x = self.conv_1(embeds)\n",
+    "        y = self.conv_2(embeds)\n",
+    "        z = self.conv_3(embeds)\n",
+    "        flatten = torch.cat((x,view(-1), y.view(-1), z.view(-1)))\n",
+    "        \n",
+    "        out = F.tanh(self.layer(flatten))\n",
+    "        out = self.dropout(out)\n",
+    "        out = F.tanh(self.output_layer(out))  \n",
+    "        \n",
+    "cnn = CNN(50, 8000, 0.5, 50, 150, 100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CNN(\n",
+       "  (embedding): Embedding(8000, 50)\n",
+       "  (conv1): Sequential(\n",
+       "    (0): Conv2d (1, 100, kernel_size=(3, 50), stride=(1, 1))\n",
+       "    (1): ReLU()\n",
+       "    (2): MaxPool2d(kernel_size=(148, 1), stride=(148, 1), dilation=(1, 1))\n",
+       "  )\n",
+       "  (conv2): Sequential(\n",
+       "    (0): Conv2d (1, 100, kernel_size=(4, 50), stride=(1, 1))\n",
+       "    (1): ReLU()\n",
+       "    (2): MaxPool2d(kernel_size=(147, 1), stride=(147, 1), dilation=(1, 1))\n",
+       "  )\n",
+       "  (conv3): Sequential(\n",
+       "    (0): Conv2d (1, 100, kernel_size=(5, 50), stride=(1, 1))\n",
+       "    (1): ReLU()\n",
+       "    (2): MaxPool2d(kernel_size=(146, 1), stride=(146, 1), dilation=(1, 1))\n",
+       "  )\n",
+       "  (layer): Linear(in_features=300, out_features=200)\n",
+       "  (dropout): Dropout(p=0.5)\n",
+       "  (output_layer): Linear(in_features=200, out_features=50)\n",
+       ")"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cnn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Load preprocessed rating data - ./data/preprocessed/ml-1m//ratings.all\n",
+      "Load preprocessed document data - ./data/preprocessed/ml-1m//document.all\n"
+     ]
+    }
+   ],
+   "source": [
+    "from data_manager import Data_Factory\n",
+    "import pprint\n",
+    "data_factory = Data_Factory()\n",
+    "\n",
+    "R, D_all = data_factory.load(\"./data/preprocessed/ml-1m/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3544\n",
+      "95\n",
+      "[2497, 7513, 6630, 4814, 1994, 2754, 3900, 5018, 4346, 7235, 2533, 2610, 2633, 4156, 249, 2161, 1127, 146, 6530, 5018, 337, 6530, 6985, 6530, 4157, 3071, 6530, 3900, 1500, 4316, 7833, 5018, 5150, 7102, 6530, 6476, 6530, 1394, 4450, 6751, 1238, 7824, 6530, 740, 3773, 7062, 5917, 2514, 1171, 3782, 5251, 2992, 2353, 1496, 7819, 6530, 2101, 1496, 7446, 5832, 1052, 4109, 1865, 7355, 7769, 1496, 3590, 2271, 7458, 5529, 6087, 475, 6530, 2063, 1908, 2497, 2754, 3379, 4161, 5526, 6474, 2535, 7934, 3782, 6530, 5150, 807, 1354, 172, 4156, 355, 3417, 249, 2168, 1649]\n"
+     ]
+    },
+    {
+     "ename": "AttributeError",
+     "evalue": "'list' object has no attribute 'shape'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-59-2917a9b68dc8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCNN_X\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCNN_X\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCNN_X\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'"
+     ]
+    }
+   ],
+   "source": [
+    "CNN_X = D_all['X_sequence']\n",
+    "print(len(CNN_X))\n",
+    "print(len(CNN_X[3]))\n",
+    "print(CNN_X[3])\n",
+    "print(CNN_X)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "8000"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(D_all['X_vocab'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Variable containing:\n",
+      " 1.1375  0.6195  0.1585  1.0799  0.1302\n",
+      "-0.5405 -0.9589 -1.3669  1.2314  1.9734\n",
+      "-0.4789  0.5938  0.1744 -0.0176 -0.0497\n",
+      "-0.2310 -1.1388  0.7172 -0.4343  0.7839\n",
+      " 0.5238  0.7899 -0.5901  1.0298  0.3844\n",
+      "-1.4921  1.8542 -1.1308  0.7227 -1.6314\n",
+      "-0.9999  0.4745  0.3701  0.2189  0.4824\n",
+      " 0.0339  1.6608  0.5456 -2.0539  0.0004\n",
+      " 0.0580  0.9189  1.2705  1.6964 -0.6851\n",
+      "-0.4247 -1.4672  0.5220  0.0431 -0.2025\n",
+      " 1.0033 -1.0548  1.1176  0.5650 -1.4660\n",
+      "-0.8414  1.8125  1.8854 -1.6015 -0.6787\n",
+      "-0.8838  0.0412 -0.6423  1.7509 -1.9570\n",
+      " 0.5814 -1.5999  0.6436  1.4211 -1.3188\n",
+      "-0.4954 -0.6092 -1.6808 -1.0020  0.1801\n",
+      "-0.9836 -0.0847 -1.2562 -0.1226 -0.2108\n",
+      "-1.3440 -0.1142 -1.2649  0.2782 -1.4181\n",
+      "-0.0528  0.0718 -0.6514  1.1687 -1.0889\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      " 0.0339  1.6608  0.5456 -2.0539  0.0004\n",
+      "-0.7024 -0.6674 -1.9162 -0.1312  1.1091\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      " 0.2818  0.5606 -0.3546 -0.6588 -0.7651\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      "-0.7891 -1.7500  0.1098  0.8820  0.5139\n",
+      " 1.2017  0.5298 -0.7179 -1.1478 -1.6993\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      "-0.9999  0.4745  0.3701  0.2189  0.4824\n",
+      " 0.0888 -0.0128  1.5520  1.2025  0.6651\n",
+      " 0.6077  0.5434 -1.5032  1.5325  1.8256\n",
+      "-0.4112 -1.2229 -0.2878  0.6258  1.1456\n",
+      " 0.0339  1.6608  0.5456 -2.0539  0.0004\n",
+      " 0.1364 -0.6930 -2.3371  1.6786  0.5617\n",
+      " 1.0285 -1.7050 -0.4896 -1.0000  0.9725\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      "-0.3655  0.4535 -0.4016  0.2056 -1.3832\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      " 0.1841 -0.2055  1.9259  1.5805  0.2368\n",
+      "-0.4701 -0.7426 -1.1546 -1.3005 -1.7871\n",
+      " 0.2266 -0.5332 -1.2338  0.4280  2.4386\n",
+      "-0.6303  0.5834  0.5205 -0.8387  0.4257\n",
+      " 0.5106 -0.4741  0.8534 -0.0879  0.0737\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      "-0.6402  1.6770  1.0849  0.3854 -1.0779\n",
+      " 0.7510 -2.0220 -0.0449 -1.5944 -1.0741\n",
+      " 0.0605  0.4658 -0.6328 -0.2047  0.2944\n",
+      "-0.4521  0.4285  0.3141  0.3153  0.7379\n",
+      "-0.2910  0.7501  1.2844  0.8987 -1.4570\n",
+      " 0.2266  0.4233 -0.7622  0.6053  0.9736\n",
+      "-0.5485 -0.0073  0.7028  0.4528 -1.2437\n",
+      " 0.3651 -0.7326  1.1882  0.6137 -1.1131\n",
+      "-0.2044 -1.9507 -0.3135 -1.3187  0.6094\n",
+      "-1.1596  1.4216 -0.5054 -0.3568 -0.5185\n",
+      "-0.4572  0.0472 -1.4310  0.5741 -1.2894\n",
+      "-0.7071  1.8620  1.1305 -1.1232  1.5237\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      "-2.5396  1.3397  1.0959 -0.7480  0.0679\n",
+      "-0.4572  0.0472 -1.4310  0.5741 -1.2894\n",
+      " 1.6656  1.1903 -0.3698  0.2036  0.0240\n",
+      " 0.7796 -1.7166  0.5709  0.0085 -1.1771\n",
+      " 1.8073  0.3372 -0.1976  0.8187  0.1685\n",
+      " 0.8279 -0.1674 -0.9651 -0.1265  0.0651\n",
+      "-0.2603 -1.1816  0.3361  0.2628  0.8348\n",
+      " 0.7354  0.1170 -0.9391 -2.4669 -1.3682\n",
+      " 0.1860 -0.7448 -1.6378 -0.0045  1.5380\n",
+      "-0.4572  0.0472 -1.4310  0.5741 -1.2894\n",
+      " 2.1237  1.0455 -0.5948  0.0934 -1.6559\n",
+      "-0.1634 -0.5910  0.2927 -0.0937  0.7996\n",
+      " 2.6495  0.5423 -1.1649 -2.0393  0.2268\n",
+      "-0.4307 -1.1426 -0.9575 -0.3125 -0.0436\n",
+      " 0.2849  0.1704 -0.2270  0.0564  0.3925\n",
+      " 2.3563 -0.5101  1.8536  0.4569  0.2821\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      " 1.6585  0.6344 -0.0001 -0.8202  0.1913\n",
+      "-0.4837 -0.7519 -0.7759 -0.4802 -0.6648\n",
+      " 1.1375  0.6195  0.1585  1.0799  0.1302\n",
+      "-1.4921  1.8542 -1.1308  0.7227 -1.6314\n",
+      "-1.1122  1.3342 -0.7807 -0.3339 -1.1619\n",
+      " 0.1296  0.1896  1.2773  0.1513 -0.0704\n",
+      " 0.9736  0.8593  0.3178 -2.2234  0.3245\n",
+      "-0.7505 -0.9183 -1.8172 -0.0884  1.0104\n",
+      "-0.8394  0.9989 -0.3466 -0.7640 -0.3779\n",
+      "-0.6200  0.5447 -0.6092 -0.0782 -1.1962\n",
+      "-0.5485 -0.0073  0.7028  0.4528 -1.2437\n",
+      " 1.7251  1.5146 -0.6547 -0.2933 -1.5057\n",
+      " 0.1364 -0.6930 -2.3371  1.6786  0.5617\n",
+      "-1.1328  1.9744 -0.6251  0.9932  0.2207\n",
+      "-1.6040 -0.5013  0.0782  1.1310 -0.4072\n",
+      " 0.0398 -0.3110  0.3703  0.6808 -0.5264\n",
+      " 0.5814 -1.5999  0.6436  1.4211 -1.3188\n",
+      "-0.1824 -0.4074 -0.1582 -0.4725  1.2616\n",
+      "-0.3176 -1.0342  0.9127  1.4634 -0.1190\n",
+      "-0.4954 -0.6092 -1.6808 -1.0020  0.1801\n",
+      " 0.4479 -0.5058 -2.0886 -2.4117  0.6307\n",
+      " 1.4570  0.4706 -1.3763 -0.6453  0.4371\n",
+      "[torch.FloatTensor of size 95x5]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "embeds = nn.Embedding(8000, 5)\n",
+    "test = CNN_X[3]\n",
+    "tensor = torch.LongTensor(list(map(int, test)))\n",
+    "me_embed = embeds(Variable(tensor))\n",
+    "print(me_embed)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<map at 0x7fd13d822ac8>"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "map(int, test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}