@@ -212,21 +212,22 @@ for t in range(500):
212212 w2.grad.data.zero_()
213213
214214 # Use autograd to compute the backward pass. This call will compute the
215- # gradient of all loss with respect to all Variables with requires_grad=True.
215+ # gradient of loss with respect to all Variables with requires_grad=True.
216216 # After this call w1.data and w2.data will be Variables holding the gradient
217217 # of the loss with respect to w1 and w2 respectively.
218218 loss.backward()
219219
220- # Update weights using gradient descent: w1.grad and w2.grad are Variables
221- # and w1.grad.data and w2.grad.data are Tensors.
220+ # Update weights using gradient descent; w1.data and w2.data are Tensors,
221+ # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are
222+ # Tensors.
222223 w1.data -= learning_rate * w1.grad.data
223224 w2.data -= learning_rate * w2.grad.data
224225```
225226
226227## PyTorch: Defining new autograd functions
227228Under the hood, each primitive autograd operator is really two functions that
228- operate on Tensors. The * forward* function computes output Tensors from input
229- Tensors. The * backward* function recieves the gradient of the output Tensors
229+ operate on Tensors. The ** forward* * function computes output Tensors from input
230+ Tensors. The ** backward** function receives the gradient of the output Tensors
230231with respect to some scalar value, and computes the gradient of the input Tensors
231232with respect to that same scalar value.
232233
@@ -249,12 +250,11 @@ class MyReLU(torch.autograd.Function):
249250 torch.autograd.Function and implementing the forward and backward passes
250251 which operate on Tensors.
251252 """
252-
253253 def forward (self , input ):
254254 """
255255 In the forward pass we receive a Tensor containing the input and return a
256- Tensor containing the output. You can save cache arbitrary Tensors for use
257- in the backward pass using the save_for_backward method.
256+ Tensor containing the output. You can cache arbitrary Tensors for use in the
257+ backward pass using the save_for_backward method.
258258 """
259259 self .save_for_backward(input )
260260 return input .clamp(min = 0 )
@@ -279,14 +279,10 @@ dtype = torch.FloatTensor
279279N, D_in, H, D_out = 64 , 1000 , 100 , 10
280280
281281# Create random Tensors to hold input and outputs, and wrap them in Variables.
282- # Setting requires_grad=False indicates that we do not need to compute gradients
283- # with respect to these Variables during the backward pass.
284282x = Variable(torch.randn(N, D_in).type(dtype), requires_grad = False )
285283y = Variable(torch.randn(N, D_out).type(dtype), requires_grad = False )
286284
287285# Create random Tensors for weights, and wrap them in Variables.
288- # Setting requires_grad=True indicates that we want to compute gradients with
289- # respect to these Variables during the backward pass.
290286w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad = True )
291287w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad = True )
292288
@@ -423,7 +419,7 @@ In TensorFlow, packages like [Keras](https://github.com/fchollet/keras),
423419and [ TFLearn] ( http://tflearn.org/ ) provide higher-level abstractions over
424420raw computational graphs that are useful for building neural networks.
425421
426- In PyTorch, the ` nn ` package serves this same purpose. The nn package defines a set of
422+ In PyTorch, the ` nn ` package serves this same purpose. The ` nn ` package defines a set of
427423** Modules** , which are roughly equivalent to neural network layers. A Module receives
428424input Variables and computes output Variables, but may also hold internal state such as
429425Variables containing learnable parameters. The ` nn ` package also defines a set of useful
@@ -522,9 +518,9 @@ model = torch.nn.Sequential(
522518loss_fn = torch.nn.MSELoss(size_average = False )
523519
524520# Use the optim package to define an Optimizer that will update the weights of
525- # the model for us. Here we will use stochastic gradient descent (SGD), but the
526- # optim package contains many other optimization algoriths. The first argument
527- # to the Adam constructor tells the optimizer which Variables it should update.
521+ # the model for us. Here we will use Adam; the optim package contains many other
522+ # optimization algoriths. The first argument to the Adam constructor tells the
523+ # optimizer which Variables it should update.
528524learning_rate = 1e-4
529525optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
530526for t in range (500 ):
0 commit comments