|
18 | 18 | "outputs": [], |
19 | 19 | "source": [ |
20 | 20 | "from text import *\n", |
21 | | - "from utils import open_data" |
| 21 | + "from utils import open_data\n", |
| 22 | + "from notebook import psource" |
22 | 23 | ] |
23 | 24 | }, |
24 | 25 | { |
|
55 | 56 | }, |
56 | 57 | { |
57 | 58 | "cell_type": "code", |
58 | | - "execution_count": 2, |
59 | | - "metadata": { |
60 | | - "collapsed": true |
61 | | - }, |
62 | | - "outputs": [], |
63 | | - "source": [ |
64 | | - "%psource UnigramWordModel" |
65 | | - ] |
66 | | - }, |
67 | | - { |
68 | | - "cell_type": "code", |
69 | | - "execution_count": 3, |
70 | | - "metadata": { |
71 | | - "collapsed": true |
72 | | - }, |
73 | | - "outputs": [], |
74 | | - "source": [ |
75 | | - "%psource NgramWordModel" |
76 | | - ] |
77 | | - }, |
78 | | - { |
79 | | - "cell_type": "code", |
80 | | - "execution_count": 4, |
81 | | - "metadata": { |
82 | | - "collapsed": true |
83 | | - }, |
84 | | - "outputs": [], |
85 | | - "source": [ |
86 | | - "%psource UnigramCharModel" |
87 | | - ] |
88 | | - }, |
89 | | - { |
90 | | - "cell_type": "code", |
91 | | - "execution_count": 6, |
92 | | - "metadata": { |
93 | | - "collapsed": true |
94 | | - }, |
| 59 | + "execution_count": null, |
| 60 | + "metadata": {}, |
95 | 61 | "outputs": [], |
96 | 62 | "source": [ |
97 | | - "%psource NgramCharModel" |
| 63 | + "psource(UnigramWordModel, NgramWordModel, UnigramCharModel, NgramCharModel)" |
98 | 64 | ] |
99 | 65 | }, |
100 | 66 | { |
|
117 | 83 | }, |
118 | 84 | { |
119 | 85 | "cell_type": "code", |
120 | | - "execution_count": 8, |
| 86 | + "execution_count": 2, |
121 | 87 | "metadata": {}, |
122 | 88 | "outputs": [ |
123 | 89 | { |
|
156 | 122 | }, |
157 | 123 | { |
158 | 124 | "cell_type": "code", |
159 | | - "execution_count": 12, |
| 125 | + "execution_count": 3, |
160 | 126 | "metadata": {}, |
161 | 127 | "outputs": [ |
162 | 128 | { |
163 | 129 | "name": "stdout", |
164 | 130 | "output_type": "stream", |
165 | 131 | "text": [ |
166 | | - "Conditional Probabilities Table: {'myself': 1, 'to': 2, 'at': 2, 'pleased': 1, 'considered': 1, 'will': 1, 'intoxicated': 1, 'glad': 1, 'certain': 2, 'in': 2, 'now': 2, 'sitting': 1, 'unusually': 1, 'approaching': 1, 'by': 1, 'covered': 1, 'standing': 1, 'allowed': 1, 'surprised': 1, 'keenly': 1, 'afraid': 1, 'once': 2, 'crushed': 1, 'not': 4, 'rapt': 1, 'simulating': 1, 'rapidly': 1, 'quite': 1, 'describing': 1, 'wearied': 1} \n", |
| 132 | + "Conditional Probabilities Table: {'now': 2, 'glad': 1, 'keenly': 1, 'considered': 1, 'once': 2, 'not': 4, 'in': 2, 'by': 1, 'simulating': 1, 'intoxicated': 1, 'wearied': 1, 'quite': 1, 'certain': 2, 'sitting': 1, 'to': 2, 'rapidly': 1, 'will': 1, 'describing': 1, 'allowed': 1, 'at': 2, 'afraid': 1, 'covered': 1, 'approaching': 1, 'standing': 1, 'myself': 1, 'surprised': 1, 'unusually': 1, 'rapt': 1, 'pleased': 1, 'crushed': 1} \n", |
167 | 133 | "\n", |
168 | 134 | "Conditional Probability of 'once' give 'i was': 0.05128205128205128 \n", |
169 | 135 | "\n", |
170 | | - "Next word after 'i was': not\n" |
| 136 | + "Next word after 'i was': wearied\n" |
171 | 137 | ] |
172 | 138 | } |
173 | 139 | ], |
|
198 | 164 | }, |
199 | 165 | { |
200 | 166 | "cell_type": "code", |
201 | | - "execution_count": 3, |
| 167 | + "execution_count": 4, |
202 | 168 | "metadata": {}, |
203 | 169 | "outputs": [ |
204 | 170 | { |
|
246 | 212 | }, |
247 | 213 | { |
248 | 214 | "cell_type": "code", |
249 | | - "execution_count": 4, |
| 215 | + "execution_count": 5, |
250 | 216 | "metadata": {}, |
251 | 217 | "outputs": [ |
252 | 218 | { |
253 | 219 | "name": "stdout", |
254 | 220 | "output_type": "stream", |
255 | 221 | "text": [ |
256 | | - "not it of before most regions multitudes the a three\n", |
257 | | - "the inhabitants of so also refers to the cube with\n", |
258 | | - "the service of education waxed daily more numerous than the\n" |
| 222 | + "hearing as inside is confined to conduct by the duties\n", |
| 223 | + "all and of voice being in a day of the\n", |
| 224 | + "party they are stirred to mutual warfare and perish by\n" |
259 | 225 | ] |
260 | 226 | } |
261 | 227 | ], |
|
283 | 249 | }, |
284 | 250 | { |
285 | 251 | "cell_type": "code", |
286 | | - "execution_count": 19, |
| 252 | + "execution_count": 6, |
287 | 253 | "metadata": {}, |
288 | 254 | "outputs": [ |
289 | 255 | { |
290 | 256 | "name": "stdout", |
291 | 257 | "output_type": "stream", |
292 | 258 | "text": [ |
293 | | - "it again stealing away through the ranks of his nephew but he laughed most immoderately\n", |
294 | | - "exclaiming that he henceforth exchanged them for the artist s pencil how great and glorious\n", |
295 | | - "compound now for nothing worse but however all that is quite out of the question\n", |
296 | | - "accordance with precedent and for the sake of secrecy he must condemn him to perpetual\n" |
| 259 | + "leave them at cleveland this christmas now pray do not ask you to relate or\n", |
| 260 | + "meaning and both of us sprang forward in the direction and no sooner had they\n", |
| 261 | + "palmer though very unwilling to go as well from real humanity and good nature as\n", |
| 262 | + "time about what they should do and they agreed he should take orders directly and\n" |
297 | 263 | ] |
298 | 264 | } |
299 | 265 | ], |
300 | 266 | "source": [ |
301 | 267 | "data = open_data(\"EN-text/flatland.txt\").read()\n", |
302 | | - "data += open_data(\"EN-text/gutenberg.txt\").read()\n", |
303 | 268 | "data += open_data(\"EN-text/sense.txt\").read()\n", |
304 | 269 | "\n", |
305 | 270 | "wordseq = words(data)\n", |
|
344 | 309 | }, |
345 | 310 | { |
346 | 311 | "cell_type": "code", |
347 | | - "execution_count": 3, |
348 | | - "metadata": { |
349 | | - "collapsed": true |
350 | | - }, |
| 312 | + "execution_count": null, |
| 313 | + "metadata": {}, |
351 | 314 | "outputs": [], |
352 | 315 | "source": [ |
353 | | - "%psource viterbi_segment" |
| 316 | + "psource(viterbi_segment)" |
354 | 317 | ] |
355 | 318 | }, |
356 | 319 | { |
|
373 | 336 | }, |
374 | 337 | { |
375 | 338 | "cell_type": "code", |
376 | | - "execution_count": 4, |
| 339 | + "execution_count": 3, |
377 | 340 | "metadata": {}, |
378 | 341 | "outputs": [ |
379 | 342 | { |
|
388 | 351 | "source": [ |
389 | 352 | "flatland = open_data(\"EN-text/flatland.txt\").read()\n", |
390 | 353 | "wordseq = words(flatland)\n", |
391 | | - "P = UnigramTextModel(wordseq)\n", |
| 354 | + "P = UnigramWordModel(wordseq)\n", |
392 | 355 | "text = \"itiseasytoreadwordswithoutspaces\"\n", |
393 | 356 | "\n", |
394 | 357 | "s, p = viterbi_segment(text,P)\n", |
|
447 | 410 | }, |
448 | 411 | "outputs": [], |
449 | 412 | "source": [ |
450 | | - "%psource IRSystem" |
| 413 | + "psource(IRSystem)" |
451 | 414 | ] |
452 | 415 | }, |
453 | 416 | { |
|
490 | 453 | }, |
491 | 454 | "outputs": [], |
492 | 455 | "source": [ |
493 | | - "%psource UnixConsultant" |
| 456 | + "psource(UnixConsultant)" |
494 | 457 | ] |
495 | 458 | }, |
496 | 459 | { |
|
504 | 467 | }, |
505 | 468 | { |
506 | 469 | "cell_type": "code", |
507 | | - "execution_count": 9, |
| 470 | + "execution_count": 4, |
508 | 471 | "metadata": {}, |
509 | 472 | "outputs": [ |
510 | 473 | { |
|
533 | 496 | }, |
534 | 497 | { |
535 | 498 | "cell_type": "code", |
536 | | - "execution_count": 10, |
| 499 | + "execution_count": 5, |
537 | 500 | "metadata": {}, |
538 | 501 | "outputs": [ |
539 | 502 | { |
|
628 | 591 | }, |
629 | 592 | { |
630 | 593 | "cell_type": "code", |
631 | | - "execution_count": 5, |
| 594 | + "execution_count": 6, |
632 | 595 | "metadata": {}, |
633 | 596 | "outputs": [ |
634 | 597 | { |
|
656 | 619 | }, |
657 | 620 | { |
658 | 621 | "cell_type": "code", |
659 | | - "execution_count": 6, |
| 622 | + "execution_count": 7, |
660 | 623 | "metadata": {}, |
661 | 624 | "outputs": [ |
662 | 625 | { |
|
748 | 711 | }, |
749 | 712 | { |
750 | 713 | "cell_type": "code", |
751 | | - "execution_count": 10, |
752 | | - "metadata": { |
753 | | - "collapsed": true |
754 | | - }, |
| 714 | + "execution_count": null, |
| 715 | + "metadata": {}, |
755 | 716 | "outputs": [], |
756 | 717 | "source": [ |
757 | | - "%psource PermutationDecoder" |
| 718 | + "psource(PermutationDecoder)" |
758 | 719 | ] |
759 | 720 | }, |
760 | 721 | { |
|
811 | 772 | "name": "python", |
812 | 773 | "nbconvert_exporter": "python", |
813 | 774 | "pygments_lexer": "ipython3", |
814 | | - "version": "3.5.2+" |
| 775 | + "version": "3.5.3" |
815 | 776 | } |
816 | 777 | }, |
817 | 778 | "nbformat": 4, |
|
0 commit comments