| 
18 | 18 |    "outputs": [],  | 
19 | 19 |    "source": [  | 
20 | 20 |     "from text import *\n",  | 
21 |  | -    "from utils import open_data"  | 
 | 21 | +    "from utils import open_data\n",  | 
 | 22 | +    "from notebook import psource"  | 
22 | 23 |    ]  | 
23 | 24 |   },  | 
24 | 25 |   {  | 
 | 
55 | 56 |   },  | 
56 | 57 |   {  | 
57 | 58 |    "cell_type": "code",  | 
58 |  | -   "execution_count": 2,  | 
59 |  | -   "metadata": {  | 
60 |  | -    "collapsed": true  | 
61 |  | -   },  | 
62 |  | -   "outputs": [],  | 
63 |  | -   "source": [  | 
64 |  | -    "%psource UnigramWordModel"  | 
65 |  | -   ]  | 
66 |  | -  },  | 
67 |  | -  {  | 
68 |  | -   "cell_type": "code",  | 
69 |  | -   "execution_count": 3,  | 
70 |  | -   "metadata": {  | 
71 |  | -    "collapsed": true  | 
72 |  | -   },  | 
73 |  | -   "outputs": [],  | 
74 |  | -   "source": [  | 
75 |  | -    "%psource NgramWordModel"  | 
76 |  | -   ]  | 
77 |  | -  },  | 
78 |  | -  {  | 
79 |  | -   "cell_type": "code",  | 
80 |  | -   "execution_count": 4,  | 
81 |  | -   "metadata": {  | 
82 |  | -    "collapsed": true  | 
83 |  | -   },  | 
84 |  | -   "outputs": [],  | 
85 |  | -   "source": [  | 
86 |  | -    "%psource UnigramCharModel"  | 
87 |  | -   ]  | 
88 |  | -  },  | 
89 |  | -  {  | 
90 |  | -   "cell_type": "code",  | 
91 |  | -   "execution_count": 6,  | 
92 |  | -   "metadata": {  | 
93 |  | -    "collapsed": true  | 
94 |  | -   },  | 
 | 59 | +   "execution_count": null,  | 
 | 60 | +   "metadata": {},  | 
95 | 61 |    "outputs": [],  | 
96 | 62 |    "source": [  | 
97 |  | -    "%psource NgramCharModel"  | 
 | 63 | +    "psource(UnigramWordModel, NgramWordModel, UnigramCharModel, NgramCharModel)"  | 
98 | 64 |    ]  | 
99 | 65 |   },  | 
100 | 66 |   {  | 
 | 
117 | 83 |   },  | 
118 | 84 |   {  | 
119 | 85 |    "cell_type": "code",  | 
120 |  | -   "execution_count": 8,  | 
 | 86 | +   "execution_count": 2,  | 
121 | 87 |    "metadata": {},  | 
122 | 88 |    "outputs": [  | 
123 | 89 |     {  | 
 | 
156 | 122 |   },  | 
157 | 123 |   {  | 
158 | 124 |    "cell_type": "code",  | 
159 |  | -   "execution_count": 12,  | 
 | 125 | +   "execution_count": 3,  | 
160 | 126 |    "metadata": {},  | 
161 | 127 |    "outputs": [  | 
162 | 128 |     {  | 
163 | 129 |      "name": "stdout",  | 
164 | 130 |      "output_type": "stream",  | 
165 | 131 |      "text": [  | 
166 |  | -      "Conditional Probabilities Table: {'myself': 1, 'to': 2, 'at': 2, 'pleased': 1, 'considered': 1, 'will': 1, 'intoxicated': 1, 'glad': 1, 'certain': 2, 'in': 2, 'now': 2, 'sitting': 1, 'unusually': 1, 'approaching': 1, 'by': 1, 'covered': 1, 'standing': 1, 'allowed': 1, 'surprised': 1, 'keenly': 1, 'afraid': 1, 'once': 2, 'crushed': 1, 'not': 4, 'rapt': 1, 'simulating': 1, 'rapidly': 1, 'quite': 1, 'describing': 1, 'wearied': 1} \n",  | 
 | 132 | +      "Conditional Probabilities Table: {'now': 2, 'glad': 1, 'keenly': 1, 'considered': 1, 'once': 2, 'not': 4, 'in': 2, 'by': 1, 'simulating': 1, 'intoxicated': 1, 'wearied': 1, 'quite': 1, 'certain': 2, 'sitting': 1, 'to': 2, 'rapidly': 1, 'will': 1, 'describing': 1, 'allowed': 1, 'at': 2, 'afraid': 1, 'covered': 1, 'approaching': 1, 'standing': 1, 'myself': 1, 'surprised': 1, 'unusually': 1, 'rapt': 1, 'pleased': 1, 'crushed': 1} \n",  | 
167 | 133 |       "\n",  | 
168 | 134 |       "Conditional Probability of 'once' give 'i was': 0.05128205128205128 \n",  | 
169 | 135 |       "\n",  | 
170 |  | -      "Next word after 'i was': not\n"  | 
 | 136 | +      "Next word after 'i was': wearied\n"  | 
171 | 137 |      ]  | 
172 | 138 |     }  | 
173 | 139 |    ],  | 
 | 
198 | 164 |   },  | 
199 | 165 |   {  | 
200 | 166 |    "cell_type": "code",  | 
201 |  | -   "execution_count": 3,  | 
 | 167 | +   "execution_count": 4,  | 
202 | 168 |    "metadata": {},  | 
203 | 169 |    "outputs": [  | 
204 | 170 |     {  | 
 | 
246 | 212 |   },  | 
247 | 213 |   {  | 
248 | 214 |    "cell_type": "code",  | 
249 |  | -   "execution_count": 4,  | 
 | 215 | +   "execution_count": 5,  | 
250 | 216 |    "metadata": {},  | 
251 | 217 |    "outputs": [  | 
252 | 218 |     {  | 
253 | 219 |      "name": "stdout",  | 
254 | 220 |      "output_type": "stream",  | 
255 | 221 |      "text": [  | 
256 |  | -      "not it of before most regions multitudes the a three\n",  | 
257 |  | -      "the inhabitants of so also refers to the cube with\n",  | 
258 |  | -      "the service of education waxed daily more numerous than the\n"  | 
 | 222 | +      "hearing as inside is confined to conduct by the duties\n",  | 
 | 223 | +      "all and of voice being in a day of the\n",  | 
 | 224 | +      "party they are stirred to mutual warfare and perish by\n"  | 
259 | 225 |      ]  | 
260 | 226 |     }  | 
261 | 227 |    ],  | 
 | 
283 | 249 |   },  | 
284 | 250 |   {  | 
285 | 251 |    "cell_type": "code",  | 
286 |  | -   "execution_count": 19,  | 
 | 252 | +   "execution_count": 6,  | 
287 | 253 |    "metadata": {},  | 
288 | 254 |    "outputs": [  | 
289 | 255 |     {  | 
290 | 256 |      "name": "stdout",  | 
291 | 257 |      "output_type": "stream",  | 
292 | 258 |      "text": [  | 
293 |  | -      "it again stealing away through the ranks of his nephew but he laughed most immoderately\n",  | 
294 |  | -      "exclaiming that he henceforth exchanged them for the artist s pencil how great and glorious\n",  | 
295 |  | -      "compound now for nothing worse but however all that is quite out of the question\n",  | 
296 |  | -      "accordance with precedent and for the sake of secrecy he must condemn him to perpetual\n"  | 
 | 259 | +      "leave them at cleveland this christmas now pray do not ask you to relate or\n",  | 
 | 260 | +      "meaning and both of us sprang forward in the direction and no sooner had they\n",  | 
 | 261 | +      "palmer though very unwilling to go as well from real humanity and good nature as\n",  | 
 | 262 | +      "time about what they should do and they agreed he should take orders directly and\n"  | 
297 | 263 |      ]  | 
298 | 264 |     }  | 
299 | 265 |    ],  | 
300 | 266 |    "source": [  | 
301 | 267 |     "data = open_data(\"EN-text/flatland.txt\").read()\n",  | 
302 |  | -    "data += open_data(\"EN-text/gutenberg.txt\").read()\n",  | 
303 | 268 |     "data += open_data(\"EN-text/sense.txt\").read()\n",  | 
304 | 269 |     "\n",  | 
305 | 270 |     "wordseq = words(data)\n",  | 
 | 
344 | 309 |   },  | 
345 | 310 |   {  | 
346 | 311 |    "cell_type": "code",  | 
347 |  | -   "execution_count": 3,  | 
348 |  | -   "metadata": {  | 
349 |  | -    "collapsed": true  | 
350 |  | -   },  | 
 | 312 | +   "execution_count": null,  | 
 | 313 | +   "metadata": {},  | 
351 | 314 |    "outputs": [],  | 
352 | 315 |    "source": [  | 
353 |  | -    "%psource viterbi_segment"  | 
 | 316 | +    "psource(viterbi_segment)"  | 
354 | 317 |    ]  | 
355 | 318 |   },  | 
356 | 319 |   {  | 
 | 
373 | 336 |   },  | 
374 | 337 |   {  | 
375 | 338 |    "cell_type": "code",  | 
376 |  | -   "execution_count": 4,  | 
 | 339 | +   "execution_count": 3,  | 
377 | 340 |    "metadata": {},  | 
378 | 341 |    "outputs": [  | 
379 | 342 |     {  | 
 | 
388 | 351 |    "source": [  | 
389 | 352 |     "flatland = open_data(\"EN-text/flatland.txt\").read()\n",  | 
390 | 353 |     "wordseq = words(flatland)\n",  | 
391 |  | -    "P = UnigramTextModel(wordseq)\n",  | 
 | 354 | +    "P = UnigramWordModel(wordseq)\n",  | 
392 | 355 |     "text = \"itiseasytoreadwordswithoutspaces\"\n",  | 
393 | 356 |     "\n",  | 
394 | 357 |     "s, p = viterbi_segment(text,P)\n",  | 
 | 
447 | 410 |    },  | 
448 | 411 |    "outputs": [],  | 
449 | 412 |    "source": [  | 
450 |  | -    "%psource IRSystem"  | 
 | 413 | +    "psource(IRSystem)"  | 
451 | 414 |    ]  | 
452 | 415 |   },  | 
453 | 416 |   {  | 
 | 
490 | 453 |    },  | 
491 | 454 |    "outputs": [],  | 
492 | 455 |    "source": [  | 
493 |  | -    "%psource UnixConsultant"  | 
 | 456 | +    "psource(UnixConsultant)"  | 
494 | 457 |    ]  | 
495 | 458 |   },  | 
496 | 459 |   {  | 
 | 
504 | 467 |   },  | 
505 | 468 |   {  | 
506 | 469 |    "cell_type": "code",  | 
507 |  | -   "execution_count": 9,  | 
 | 470 | +   "execution_count": 4,  | 
508 | 471 |    "metadata": {},  | 
509 | 472 |    "outputs": [  | 
510 | 473 |     {  | 
 | 
533 | 496 |   },  | 
534 | 497 |   {  | 
535 | 498 |    "cell_type": "code",  | 
536 |  | -   "execution_count": 10,  | 
 | 499 | +   "execution_count": 5,  | 
537 | 500 |    "metadata": {},  | 
538 | 501 |    "outputs": [  | 
539 | 502 |     {  | 
 | 
628 | 591 |   },  | 
629 | 592 |   {  | 
630 | 593 |    "cell_type": "code",  | 
631 |  | -   "execution_count": 5,  | 
 | 594 | +   "execution_count": 6,  | 
632 | 595 |    "metadata": {},  | 
633 | 596 |    "outputs": [  | 
634 | 597 |     {  | 
 | 
656 | 619 |   },  | 
657 | 620 |   {  | 
658 | 621 |    "cell_type": "code",  | 
659 |  | -   "execution_count": 6,  | 
 | 622 | +   "execution_count": 7,  | 
660 | 623 |    "metadata": {},  | 
661 | 624 |    "outputs": [  | 
662 | 625 |     {  | 
 | 
748 | 711 |   },  | 
749 | 712 |   {  | 
750 | 713 |    "cell_type": "code",  | 
751 |  | -   "execution_count": 10,  | 
752 |  | -   "metadata": {  | 
753 |  | -    "collapsed": true  | 
754 |  | -   },  | 
 | 714 | +   "execution_count": null,  | 
 | 715 | +   "metadata": {},  | 
755 | 716 |    "outputs": [],  | 
756 | 717 |    "source": [  | 
757 |  | -    "%psource PermutationDecoder"  | 
 | 718 | +    "psource(PermutationDecoder)"  | 
758 | 719 |    ]  | 
759 | 720 |   },  | 
760 | 721 |   {  | 
 | 
811 | 772 |    "name": "python",  | 
812 | 773 |    "nbconvert_exporter": "python",  | 
813 | 774 |    "pygments_lexer": "ipython3",  | 
814 |  | -   "version": "3.5.2+"  | 
 | 775 | +   "version": "3.5.3"  | 
815 | 776 |   }  | 
816 | 777 |  },  | 
817 | 778 |  "nbformat": 4,  | 
 | 
0 commit comments