1- from __future__ import print_function , division
2- from builtins import range
31import numpy as np
42
5-
63"""
74This file defines layer types that are commonly used for recurrent neural
85networks.
@@ -30,13 +27,14 @@ def rnn_step_forward(x, prev_h, Wx, Wh, b):
3027 """
3128 next_h , cache = None , None
3229 ##############################################################################
33- # TODO: Implement a single forward step for the vanilla RNN. Store the next #
30+ # Implement a single forward step for the vanilla RNN. Store the next #
3431 # hidden state and any values you need for the backward pass in the next_h #
3532 # and cache variables respectively. #
3633 ##############################################################################
3734 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
3835
39- pass
36+ next_h = np .tanh (prev_h @ Wh + x @ Wx + b )
37+ cache = x , prev_h , Wx , Wh , b , next_h
4038
4139 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
4240 ##############################################################################
@@ -62,14 +60,20 @@ def rnn_step_backward(dnext_h, cache):
6260 """
6361 dx , dprev_h , dWx , dWh , db = None , None , None , None , None
6462 ##############################################################################
65- # TODO: Implement the backward pass for a single step of a vanilla RNN. #
63+ # Implement the backward pass for a single step of a vanilla RNN. #
6664 # #
6765 # HINT: For the tanh function, you can compute the local derivative in terms #
6866 # of the output value from tanh. #
6967 ##############################################################################
7068 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
7169
72- pass
70+ x , prev_h , Wx , Wh , b , next_h = cache
71+ dnext_h : np .ndarray = (1 - next_h ** 2 ) * dnext_h
72+ dx = dnext_h @ Wx .T
73+ dprev_h = dnext_h @ Wh .T
74+ dWx = x .T @ dnext_h
75+ dWh = prev_h .T @ dnext_h
76+ db = dnext_h .sum (0 )
7377
7478 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
7579 ##############################################################################
@@ -78,7 +82,7 @@ def rnn_step_backward(dnext_h, cache):
7882 return dx , dprev_h , dWx , dWh , db
7983
8084
81- def rnn_forward (x , h0 , Wx , Wh , b ):
85+ def rnn_forward (x : np . ndarray , h0 , Wx , Wh , b ):
8286 """
8387 Run a vanilla RNN forward on an entire sequence of data. We assume an input
8488 sequence composed of T vectors, each of dimension D. The RNN uses a hidden
@@ -98,13 +102,19 @@ def rnn_forward(x, h0, Wx, Wh, b):
98102 """
99103 h , cache = None , None
100104 ##############################################################################
101- # TODO: Implement forward pass for a vanilla RNN running on a sequence of #
105+ # Implement forward pass for a vanilla RNN running on a sequence of #
102106 # input data. You should use the rnn_step_forward function that you defined #
103107 # above. You can use a for loop to help compute the forward pass. #
104108 ##############################################################################
105109 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
106110
107- pass
111+ N , T , D = x .shape
112+ cache = []
113+ h = np .zeros ((N , T , h0 .shape [1 ]))
114+ for i in range (T ):
115+ h0 , c = rnn_step_forward (x [:, i ], h0 , Wx , Wh , b )
116+ h [:, i ] += h0
117+ cache .append (c )
108118
109119 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
110120 ##############################################################################
@@ -113,7 +123,7 @@ def rnn_forward(x, h0, Wx, Wh, b):
113123 return h , cache
114124
115125
116- def rnn_backward (dh , cache ):
126+ def rnn_backward (dh , cache : list ):
117127 """
118128 Compute the backward pass for a vanilla RNN over an entire sequence of data.
119129
@@ -134,13 +144,25 @@ def rnn_backward(dh, cache):
134144 """
135145 dx , dh0 , dWx , dWh , db = None , None , None , None , None
136146 ##############################################################################
137- # TODO: Implement the backward pass for a vanilla RNN running an entire #
147+ # Implement the backward pass for a vanilla RNN running an entire #
138148 # sequence of data. You should use the rnn_step_backward function that you #
139149 # defined above. You can use a for loop to help compute the backward pass. #
140150 ##############################################################################
141151 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
142152
143- pass
153+ N , T , H = dh .shape
154+ D = cache [0 ][0 ].shape [1 ]
155+ dx = np .zeros ((N , T , D ))
156+ dh0 = np .zeros ((N , H ))
157+ dWx = np .zeros ((D , H ))
158+ dWh = np .zeros ((H , H ))
159+ db = np .zeros ((H ,))
160+
161+ for i in reversed (range (T )):
162+ dx [:, i ], dh0 , dWx_ , dWh_ , db_ = rnn_step_backward (dh [:, i ] + dh0 , cache .pop ())
163+ dWx += dWx_
164+ dWh += dWh_
165+ db += db_
144166
145167 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
146168 ##############################################################################
@@ -166,13 +188,14 @@ def word_embedding_forward(x, W):
166188 """
167189 out , cache = None , None
168190 ##############################################################################
169- # TODO: Implement the forward pass for word embeddings. #
191+ # Implement the forward pass for word embeddings. #
170192 # #
171193 # HINT: This can be done in one line using NumPy's array indexing. #
172194 ##############################################################################
173195 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
174196
175- pass
197+ out = W [x ]
198+ cache = x , W
176199
177200 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
178201 ##############################################################################
@@ -198,14 +221,17 @@ def word_embedding_backward(dout, cache):
198221 """
199222 dW = None
200223 ##############################################################################
201- # TODO: Implement the backward pass for word embeddings. #
224+ # Implement the backward pass for word embeddings. #
202225 # #
203226 # Note that words can appear more than once in a sequence. #
204227 # HINT: Look up the function np.add.at #
205228 ##############################################################################
206229 # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
207230
208- pass
231+ x , w = cache
232+ dW = np .zeros_like (w )
233+ N , T , D = dout .shape
234+ np .add .at (dW , x .flatten (), dout .reshape (- 1 , D ))
209235
210236 # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
211237 ##############################################################################
0 commit comments