@@ -82,25 +82,26 @@ def sparse_autoencoder_cost_and_grad(theta, visible_size, hidden_size, decay_lam
82
82
prime2 = np .multiply (a2 , (1.0 - a2 ))
83
83
delta2 = np .multiply (np .dot (delta3 , np .transpose (w2 )) + beta * sparsity_term , prime2 )
84
84
85
+ # compute partial gradient
86
+ w1grad_p = np .dot (np .transpose (a1 ), delta2 )
87
+ w2grad_p = np .dot (np .transpose (a2 ), delta3 )
88
+ b1grad_p = delta2
89
+ b2grad_p = delta3
90
+
85
91
# compute gradient
86
- w1grad = np .zeros_like (w1 )
87
- for i in xrange (data .shape [0 ]):
88
- w1grad += np .dot (a1 [i , :].reshape ((visible_size , 1 )), delta2 [i , :].reshape ((1 , hidden_size )))
89
- w1grad = one_over_m * w1grad + decay_lambda * w1
90
- w2grad = np .zeros_like (w2 )
91
- for i in xrange (data .shape [0 ]):
92
- w2grad += np .dot (a2 [i , :].reshape ((hidden_size , 1 )), delta3 [i , :].reshape ((1 , visible_size )))
93
- w2grad = one_over_m * w2grad + decay_lambda * w2
94
- b1grad = one_over_m * np .sum (delta2 , axis = 0 )
95
- b2grad = one_over_m * np .sum (delta3 , axis = 0 )
92
+ w1grad = one_over_m * w1grad_p + decay_lambda * w1
93
+ w2grad = one_over_m * w2grad_p + decay_lambda * w2
94
+ b1grad = one_over_m * np .sum (b1grad_p , axis = 0 )
95
+ b2grad = one_over_m * np .sum (b2grad_p , axis = 0 )
96
96
97
97
# compute cost
98
98
error_flatten = (a3 - y ).flatten ()
99
99
w1_flatten = w1 .flatten ()
100
100
w2_flatten = w2 .flatten ()
101
101
cost = np .dot (error_flatten , error_flatten )* one_over_m / 2.0 + \
102
102
decay_lambda * (np .dot (w1_flatten , w1_flatten )+ np .dot (w2_flatten , w2_flatten ))/ 2.0 + \
103
- beta * (np .sum (sparsity_param * np .log (sparsity_param / sparsity_avg )+ (1.0 - sparsity_param )* np .log ((1.0 - sparsity_param )/ (1.0 - sparsity_avg ))))
103
+ beta * (np .sum (sparsity_param * np .log (sparsity_param / sparsity_avg ) +
104
+ (1.0 - sparsity_param )* np .log ((1.0 - sparsity_param )/ (1.0 - sparsity_avg ))))
104
105
105
106
# After computing the cost and gradient, we will convert the gradients back
106
107
# to a vector format (suitable for minFunc). Specifically, we will unroll
0 commit comments