|  | 
| 484 | 484 |    "cell_type": "markdown", | 
| 485 | 485 |    "metadata": {}, | 
| 486 | 486 |    "source": [ | 
| 487 |  | -    "Theano" | 
|  | 487 | +    "`Theano` 中可以定义共享的变量,它们可以在多个函数中被共享。" | 
|  | 488 | +   ] | 
|  | 489 | +  }, | 
|  | 490 | +  { | 
|  | 491 | +   "cell_type": "code", | 
|  | 492 | +   "execution_count": 19, | 
|  | 493 | +   "metadata": { | 
|  | 494 | +    "collapsed": false | 
|  | 495 | +   }, | 
|  | 496 | +   "outputs": [ | 
|  | 497 | +    { | 
|  | 498 | +     "name": "stdout", | 
|  | 499 | +     "output_type": "stream", | 
|  | 500 | +     "text": [ | 
|  | 501 | +      "TensorType(float64, matrix)\n" | 
|  | 502 | +     ] | 
|  | 503 | +    } | 
|  | 504 | +   ], | 
|  | 505 | +   "source": [ | 
|  | 506 | +    "shared_var = theano.shared(np.array([[1.0, 2.0], [3.0, 4.0]]))\n", | 
|  | 507 | +    "\n", | 
|  | 508 | +    "print shared_var.type" | 
|  | 509 | +   ] | 
|  | 510 | +  }, | 
|  | 511 | +  { | 
|  | 512 | +   "cell_type": "markdown", | 
|  | 513 | +   "metadata": {}, | 
|  | 514 | +   "source": [ | 
|  | 515 | +    "可以通过 `set_value` 方法改变它的值:" | 
|  | 516 | +   ] | 
|  | 517 | +  }, | 
|  | 518 | +  { | 
|  | 519 | +   "cell_type": "code", | 
|  | 520 | +   "execution_count": 20, | 
|  | 521 | +   "metadata": { | 
|  | 522 | +    "collapsed": true | 
|  | 523 | +   }, | 
|  | 524 | +   "outputs": [], | 
|  | 525 | +   "source": [ | 
|  | 526 | +    "shared_var.set_value(np.array([[3.0, 4], [2, 1]]))" | 
|  | 527 | +   ] | 
|  | 528 | +  }, | 
|  | 529 | +  { | 
|  | 530 | +   "cell_type": "markdown", | 
|  | 531 | +   "metadata": {}, | 
|  | 532 | +   "source": [ | 
|  | 533 | +    "通过 `get_value()` 方法返回它的值:" | 
|  | 534 | +   ] | 
|  | 535 | +  }, | 
|  | 536 | +  { | 
|  | 537 | +   "cell_type": "code", | 
|  | 538 | +   "execution_count": 21, | 
|  | 539 | +   "metadata": { | 
|  | 540 | +    "collapsed": false | 
|  | 541 | +   }, | 
|  | 542 | +   "outputs": [ | 
|  | 543 | +    { | 
|  | 544 | +     "data": { | 
|  | 545 | +      "text/plain": [ | 
|  | 546 | +       "array([[ 3.,  4.],\n", | 
|  | 547 | +       "       [ 2.,  1.]])" | 
|  | 548 | +      ] | 
|  | 549 | +     }, | 
|  | 550 | +     "execution_count": 21, | 
|  | 551 | +     "metadata": {}, | 
|  | 552 | +     "output_type": "execute_result" | 
|  | 553 | +    } | 
|  | 554 | +   ], | 
|  | 555 | +   "source": [ | 
|  | 556 | +    "shared_var.get_value()" | 
|  | 557 | +   ] | 
|  | 558 | +  }, | 
|  | 559 | +  { | 
|  | 560 | +   "cell_type": "markdown", | 
|  | 561 | +   "metadata": {}, | 
|  | 562 | +   "source": [ | 
|  | 563 | +    "共享变量进行运算:" | 
|  | 564 | +   ] | 
|  | 565 | +  }, | 
|  | 566 | +  { | 
|  | 567 | +   "cell_type": "code", | 
|  | 568 | +   "execution_count": 22, | 
|  | 569 | +   "metadata": { | 
|  | 570 | +    "collapsed": false | 
|  | 571 | +   }, | 
|  | 572 | +   "outputs": [ | 
|  | 573 | +    { | 
|  | 574 | +     "name": "stdout", | 
|  | 575 | +     "output_type": "stream", | 
|  | 576 | +     "text": [ | 
|  | 577 | +      "[[  9.  16.]\n", | 
|  | 578 | +      " [  4.   1.]]\n" | 
|  | 579 | +     ] | 
|  | 580 | +    } | 
|  | 581 | +   ], | 
|  | 582 | +   "source": [ | 
|  | 583 | +    "shared_square = shared_var ** 2\n", | 
|  | 584 | +    "\n", | 
|  | 585 | +    "f = theano.function([], shared_square)\n", | 
|  | 586 | +    "\n", | 
|  | 587 | +    "print f()" | 
|  | 588 | +   ] | 
|  | 589 | +  }, | 
|  | 590 | +  { | 
|  | 591 | +   "cell_type": "markdown", | 
|  | 592 | +   "metadata": {}, | 
|  | 593 | +   "source": [ | 
|  | 594 | +    "这里函数不需要参数,因为共享变量隐式地被认为是一个参数。\n", | 
|  | 595 | +    "\n", | 
|  | 596 | +    "得到的结果会随这个共享变量的变化而变化:" | 
|  | 597 | +   ] | 
|  | 598 | +  }, | 
|  | 599 | +  { | 
|  | 600 | +   "cell_type": "code", | 
|  | 601 | +   "execution_count": 23, | 
|  | 602 | +   "metadata": { | 
|  | 603 | +    "collapsed": false | 
|  | 604 | +   }, | 
|  | 605 | +   "outputs": [ | 
|  | 606 | +    { | 
|  | 607 | +     "name": "stdout", | 
|  | 608 | +     "output_type": "stream", | 
|  | 609 | +     "text": [ | 
|  | 610 | +      "[[  1.   4.]\n", | 
|  | 611 | +      " [  9.  16.]]\n" | 
|  | 612 | +     ] | 
|  | 613 | +    } | 
|  | 614 | +   ], | 
|  | 615 | +   "source": [ | 
|  | 616 | +    "shared_var.set_value(np.array([[1.0, 2], [3, 4]]))\n", | 
|  | 617 | +    "\n", | 
|  | 618 | +    "print f()" | 
|  | 619 | +   ] | 
|  | 620 | +  }, | 
|  | 621 | +  { | 
|  | 622 | +   "cell_type": "markdown", | 
|  | 623 | +   "metadata": {}, | 
|  | 624 | +   "source": [ | 
|  | 625 | +    "一个共享变量的值可以用 `updates` 关键词在 `theano` 函数中被更新:" | 
|  | 626 | +   ] | 
|  | 627 | +  }, | 
|  | 628 | +  { | 
|  | 629 | +   "cell_type": "code", | 
|  | 630 | +   "execution_count": 24, | 
|  | 631 | +   "metadata": { | 
|  | 632 | +    "collapsed": true | 
|  | 633 | +   }, | 
|  | 634 | +   "outputs": [], | 
|  | 635 | +   "source": [ | 
|  | 636 | +    "subtract = T.matrix('subtract')\n", | 
|  | 637 | +    "\n", | 
|  | 638 | +    "f_update = theano.function([subtract], shared_var, updates={shared_var: shared_var - subtract})" | 
|  | 639 | +   ] | 
|  | 640 | +  }, | 
|  | 641 | +  { | 
|  | 642 | +   "cell_type": "markdown", | 
|  | 643 | +   "metadata": {}, | 
|  | 644 | +   "source": [ | 
|  | 645 | +    "这个函数先返回当前的值,然后将当前值更新为原来的值减去参数:" | 
|  | 646 | +   ] | 
|  | 647 | +  }, | 
|  | 648 | +  { | 
|  | 649 | +   "cell_type": "code", | 
|  | 650 | +   "execution_count": 25, | 
|  | 651 | +   "metadata": { | 
|  | 652 | +    "collapsed": false | 
|  | 653 | +   }, | 
|  | 654 | +   "outputs": [ | 
|  | 655 | +    { | 
|  | 656 | +     "name": "stdout", | 
|  | 657 | +     "output_type": "stream", | 
|  | 658 | +     "text": [ | 
|  | 659 | +      "before update:\n", | 
|  | 660 | +      "[[ 1.  2.]\n", | 
|  | 661 | +      " [ 3.  4.]]\n", | 
|  | 662 | +      "the return value:\n", | 
|  | 663 | +      "[[ 1.  2.]\n", | 
|  | 664 | +      " [ 3.  4.]]\n", | 
|  | 665 | +      "after update:\n", | 
|  | 666 | +      "[[ 0.  1.]\n", | 
|  | 667 | +      " [ 2.  3.]]\n" | 
|  | 668 | +     ] | 
|  | 669 | +    } | 
|  | 670 | +   ], | 
|  | 671 | +   "source": [ | 
|  | 672 | +    "print 'before update:'\n", | 
|  | 673 | +    "print shared_var.get_value()\n", | 
|  | 674 | +    "\n", | 
|  | 675 | +    "print 'the return value:'\n", | 
|  | 676 | +    "print f_update(np.array([[1.0, 1], [1, 1]]))\n", | 
|  | 677 | +    "\n", | 
|  | 678 | +    "print 'after update:'\n", | 
|  | 679 | +    "print shared_var.get_value()" | 
|  | 680 | +   ] | 
|  | 681 | +  }, | 
|  | 682 | +  { | 
|  | 683 | +   "cell_type": "markdown", | 
|  | 684 | +   "metadata": {}, | 
|  | 685 | +   "source": [ | 
|  | 686 | +    "## 导数" | 
|  | 687 | +   ] | 
|  | 688 | +  }, | 
|  | 689 | +  { | 
|  | 690 | +   "cell_type": "markdown", | 
|  | 691 | +   "metadata": {}, | 
|  | 692 | +   "source": [ | 
|  | 693 | +    "`Theano` 的一大好处在于它对符号变量计算导数的能力。\n", | 
|  | 694 | +    "\n", | 
|  | 695 | +    "我们用 `T.grad()` 来计算导数,之前我们定义了 `foo` 和 `bar` (分别是 $x$ 和 $x^2$),我们来计算 `bar` 关于 `foo` 的导数(应该是 $2x$):" | 
|  | 696 | +   ] | 
|  | 697 | +  }, | 
|  | 698 | +  { | 
|  | 699 | +   "cell_type": "code", | 
|  | 700 | +   "execution_count": 26, | 
|  | 701 | +   "metadata": { | 
|  | 702 | +    "collapsed": false | 
|  | 703 | +   }, | 
|  | 704 | +   "outputs": [ | 
|  | 705 | +    { | 
|  | 706 | +     "data": { | 
|  | 707 | +      "text/plain": [ | 
|  | 708 | +       "array(20.0)" | 
|  | 709 | +      ] | 
|  | 710 | +     }, | 
|  | 711 | +     "execution_count": 26, | 
|  | 712 | +     "metadata": {}, | 
|  | 713 | +     "output_type": "execute_result" | 
|  | 714 | +    } | 
|  | 715 | +   ], | 
|  | 716 | +   "source": [ | 
|  | 717 | +    "bar_grad = T.grad(bar, foo)  # 表示 bar (x^2) 关于 foo (x) 的导数\n", | 
|  | 718 | +    "\n", | 
|  | 719 | +    "bar_grad.eval({foo: 10})" | 
|  | 720 | +   ] | 
|  | 721 | +  }, | 
|  | 722 | +  { | 
|  | 723 | +   "cell_type": "markdown", | 
|  | 724 | +   "metadata": {}, | 
|  | 725 | +   "source": [ | 
|  | 726 | +    "再如,对之前的 $y = Ax + b$ 求 $y$ 关于 $x$ 的雅可比矩阵(应当是 $A$):" | 
|  | 727 | +   ] | 
|  | 728 | +  }, | 
|  | 729 | +  { | 
|  | 730 | +   "cell_type": "code", | 
|  | 731 | +   "execution_count": 27, | 
|  | 732 | +   "metadata": { | 
|  | 733 | +    "collapsed": false | 
|  | 734 | +   }, | 
|  | 735 | +   "outputs": [ | 
|  | 736 | +    { | 
|  | 737 | +     "name": "stdout", | 
|  | 738 | +     "output_type": "stream", | 
|  | 739 | +     "text": [ | 
|  | 740 | +      "[[ 9.  8.  7.]\n", | 
|  | 741 | +      " [ 4.  5.  6.]]\n" | 
|  | 742 | +     ] | 
|  | 743 | +    }, | 
|  | 744 | +    { | 
|  | 745 | +     "name": "stderr", | 
|  | 746 | +     "output_type": "stream", | 
|  | 747 | +     "text": [ | 
|  | 748 | +      "C:\\Anaconda\\lib\\site-packages\\theano\\scan_module\\scan_perform_ext.py:133: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility\n", | 
|  | 749 | +      "  from scan_perform.scan_perform import *\n" | 
|  | 750 | +     ] | 
|  | 751 | +    } | 
|  | 752 | +   ], | 
|  | 753 | +   "source": [ | 
|  | 754 | +    "y_J = theano.gradient.jacobian(y, x)\n", | 
|  | 755 | +    "\n", | 
|  | 756 | +    "print y_J.eval({A: np.array([[9.0, 8, 7], [4, 5, 6]]), #A\n", | 
|  | 757 | +    "                x: np.array([1.0, 2, 3]),              #x\n", | 
|  | 758 | +    "                b: np.array([4.0, 5])})                #b" | 
|  | 759 | +   ] | 
|  | 760 | +  }, | 
|  | 761 | +  { | 
|  | 762 | +   "cell_type": "markdown", | 
|  | 763 | +   "metadata": {}, | 
|  | 764 | +   "source": [ | 
|  | 765 | +    "`theano.gradient.jacobian` 用来计算雅可比矩阵,而 `theano.gradient.hessian` 可以用来计算 `Hessian` 矩阵。" | 
| 488 | 766 |    ] | 
| 489 | 767 |   } | 
| 490 | 768 |  ], | 
|  | 
0 commit comments