Skip to content

Commit 1f45934

Browse files
committed
Start reorganizing and documenting
1 parent 6f8e941 commit 1f45934

File tree

8 files changed

+333
-83
lines changed

8 files changed

+333
-83
lines changed

autograd/tf_two_layer_net.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import tensorflow as tf
2+
import numpy as np
3+
4+
"""
5+
A fully-connected ReLU network with one hidden layer and no biases, trained to
6+
predict y from x by minimizing squared Euclidean distance.
7+
8+
This implementation uses basic TensorFlow operations to set up a computational
9+
graph, then executes the graph many times to actually train the network.
10+
11+
One of the main differences between TensorFlow and PyTorch is that TensorFlow
12+
uses static computational graphs while PyTorch uses dynamic computational
13+
graphs.
14+
15+
In TensorFlow we first set up the computational graph, then execute the same
16+
graph many times.
17+
"""
18+
19+
# First we set up the computational graph:
20+
21+
# N is batch size; D_in is input dimension;
22+
# H is hidden dimension; D_out is output dimension.
23+
N, D_in, H, D_out = 64, 1000, 100, 10
24+
25+
# Create placeholders for the input and target data; these will be filled
26+
# with real data when we execute the graph.
27+
x = tf.placeholder(tf.float32, shape=(None, D_in))
28+
y = tf.placeholder(tf.float32, shape=(None, D_out))
29+
30+
# Create Variables for the weights and initialize them with random data.
31+
# A TensorFlow Variable persists its value across executions of the graph.
32+
w1 = tf.Variable(tf.random_normal((D_in, H)))
33+
w2 = tf.Variable(tf.random_normal((H, D_out)))
34+
35+
# Forward pass: Compute the predicted y using operations on TensorFlow Tensors.
36+
# Note that this code does not actually perform any numeric operations; it
37+
# merely sets up the computational graph that we will later execute.
38+
h = tf.matmul(x, w1)
39+
h_relu = tf.maximum(h, tf.zeros(1))
40+
y_pred = tf.matmul(h_relu, w2)
41+
42+
# Compute loss using operations on TensorFlow Tensors
43+
loss = tf.reduce_sum((y - y_pred) ** 2.0)
44+
45+
# Compute gradient of the loss with respect to w1 and w2.
46+
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])
47+
48+
# Update the weights using gradient descent. To actually update the weights
49+
# we need to evaluate new_w1 and new_w2 when executing the graph. Note that
50+
# in TensorFlow the the act of updating the value of the weights is part of
51+
# the computational graph; in PyTorch this happens outside the computational
52+
# graph.
53+
learning_rate = 1e-6
54+
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
55+
new_w2 = w2.assign(w2 - learning_rate * grad_w2)
56+
57+
# Now we have built our computational graph, so we enter a TensorFlow session to
58+
# actually execute the graph.
59+
with tf.Session() as sess:
60+
# Run the graph once to initialize the Variables w1 and w2.
61+
sess.run(tf.global_variables_initializer())
62+
63+
# Create numpy arrays holding the actual data for the inputs x and targets y
64+
x_value = np.random.randn(N, D_in)
65+
y_value = np.random.randn(N, D_out)
66+
for _ in range(500):
67+
# Execute the graph many times. Each time it executes we want to bind
68+
# x_value to x and y_value to y, specified with the feed_dict argument.
69+
# Each time we execute the graph we want to compute the values for loss,
70+
# new_w1, and new_w2; the values of these Tensors are returned as numpy
71+
# arrays.
72+
loss_value, _, _ = sess.run([loss, new_w1, new_w2],
73+
feed_dict={x: x_value, y: y_value})
74+
print(loss_value)

autograd/two_layer_net_autograd.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import torch
2+
from torch.autograd import Variable
3+
4+
5+
"""
6+
A fully-connected ReLU network with one hidden layer and no biases, trained to
7+
predict y from x by minimizing squared Euclidean distance.
8+
9+
This implementation computes the forward pass using operations on PyTorch
10+
Variables, and uses PyTorch autograd to compute gradients.
11+
12+
A PyTorch Variable is a wrapper around a PyTorch Tensor, and represents a node
13+
in a computational graph. If x is a Variable then x.data is a Tensor giving its
14+
value, and x.grad is another Variable holding the gradient of x with respect to
15+
some scalar value.
16+
17+
PyTorch Variables have the same API as PyTorch tensors: (almost) any operation
18+
you can do on a Tensor you can also do on a Variable; the difference is that
19+
autograd allows you to automatically compute gradients.
20+
"""
21+
22+
dtype = torch.FloatTensor
23+
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
24+
25+
# N is batch size; D_in is input dimension;
26+
# H is hidden dimension; D_out is output dimension.
27+
N, D_in, H, D_out = 64, 1000, 100, 10
28+
29+
# Create random Tensors to hold input and outputs, and wrap them in Variables.
30+
# Setting requires_grad=False indicates that we do not need to compute gradients
31+
# with respect to these Variables during the backward pass.
32+
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
33+
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
34+
35+
# Create random Tensors for weights, and wrap them in Variables.
36+
# Setting requires_grad=True indicates that we want to compute gradients with
37+
# respect to these Variables during the backward pass.
38+
w1 = Variable(torch.randn(D_in, H), requires_grad=True)
39+
w2 = Variable(torch.randn(H, D_out), requires_grad=True)
40+
41+
42+
learning_rate = 1e-6
43+
for t in range(500):
44+
# Forward pass: compute predicted y using operations on Variables; these
45+
# are exactly the same operations we used to compute the forward pass using
46+
# Tensors, but we do not need to keep references to intermediate values since
47+
# we are not implementing the backward pass by hand.
48+
y_pred = x.mm(w1).clamp(min=0).mm(w2)
49+
50+
# Compute and print loss using operations on Variables.
51+
# Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape
52+
# (1,); loss.data[0] is a scalar value holding the loss.
53+
loss = (y_pred - y).pow(2).sum()
54+
print(t, loss.data[0])
55+
56+
# Manually zero the gradients before running the backward pass
57+
w1.grad.data.zero_()
58+
w2.grad.data.zero_()
59+
60+
# Use autograd to compute the backward pass. This call will compute the
61+
# gradient of all loss with respect to all Variables with requires_grad=True.
62+
# After this call w1.data and w2.data will be Variables holding the gradient
63+
# of the loss with respect to w1 and w2 respectively.
64+
loss.backward()
65+
66+
# Update weights using gradient descent: w1.grad and w2.grad are Variables
67+
# and w1.grad.data and w2.grad.data are Tensors.
68+
w1.data -= learning_rate * w1.grad.data
69+
w2.data -= learning_rate * w2.grad.data
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import torch
2+
from torch.autograd import Variable
3+
4+
5+
6+
"""
7+
A fully-connected ReLU network with one hidden layer and no biases, trained to
8+
predict y from x by minimizing squared Euclidean distance.
9+
10+
This implementation computes the forward pass using operations on PyTorch
11+
Variables, and uses PyTorch autograd to compute gradients.
12+
13+
In this implementation we implement our own custom autograd function to perform
14+
the ReLU function.
15+
"""
16+
17+
18+
class MyReLU(torch.autograd.Function):
19+
"""
20+
We can implement our own custom autograd Functions by subclassing
21+
torch.autograd.Function and implementing the forward and backward passes
22+
which operate on Tensors.
23+
"""
24+
25+
def forward(self, input):
26+
"""
27+
In the forward pass we receive a Tensor containing the input and return a
28+
Tensor containing the output. You can save cache arbitrary Tensors for use
29+
in the backward pass using the save_for_backward method.
30+
"""
31+
self.save_for_backward(input)
32+
return input.clamp(min=0)
33+
34+
def backward(self, grad_output):
35+
"""
36+
In the backward pass we receive a Tensor containing the gradient of the loss
37+
with respect to the output, and we need to compute the gradient of the loss
38+
with respect to the input.
39+
"""
40+
input, = self.saved_tensors
41+
grad_input = grad_output.clone()
42+
grad_input[input < 0] = 0
43+
return grad_input
44+
45+
46+
dtype = torch.FloatTensor
47+
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
48+
49+
# N is batch size; D_in is input dimension;
50+
# H is hidden dimension; D_out is output dimension.
51+
N, D_in, H, D_out = 64, 1000, 100, 10
52+
53+
# Create random Tensors to hold input and outputs, and wrap them in Variables.
54+
# Setting requires_grad=False indicates that we do not need to compute gradients
55+
# with respect to these Variables during the backward pass.
56+
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
57+
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)
58+
59+
# Create random Tensors for weights, and wrap them in Variables.
60+
# Setting requires_grad=True indicates that we want to compute gradients with
61+
# respect to these Variables during the backward pass.
62+
w1 = Variable(torch.randn(D_in, H), requires_grad=True)
63+
w2 = Variable(torch.randn(H, D_out), requires_grad=True)
64+
65+
learning_rate = 1e-6
66+
for t in range(500):
67+
# Construct an instance of our MyReLU class to use in our network
68+
relu = MyReLU()
69+
70+
# Forward pass: compute predicted y using operations on Variables; we compute
71+
# ReLU using our custom autograd operation.
72+
y_pred = relu(x.mm(w1)).mm(w2)
73+
74+
# Compute and print loss
75+
loss = (y_pred - y).pow(2).sum()
76+
print(t, loss.data[0])
77+
78+
# Manually zero the gradients before running the backward pass
79+
w1.grad.data.zero_()
80+
w2.grad.data.zero_()
81+
82+
# Use autograd to compute the backward pass.
83+
loss.backward()
84+
85+
# Update weights using gradient descent
86+
w1.data -= learning_rate * w1.grad.data
87+
w2.data -= learning_rate * w2.grad.data

tensor/two_layer_net_numpy.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
3+
"""
4+
A fully-connected ReLU network with one hidden layer and no biases, trained to
5+
predict y from x using Euclidean error.
6+
7+
This implementation uses numpy to manually compute the forward pass, loss, and
8+
backward pass.
9+
10+
A numpy array is a generic n-dimensional array; it does not know anything about
11+
deep learning or gradients or computational graphs, and is just a way to perform
12+
generic numeric computations.
13+
"""
14+
15+
# N is batch size; D_in is input dimension;
16+
# H is hidden dimension; D_out is output dimension.
17+
N, D_in, H, D_out = 64, 1000, 100, 10
18+
19+
# Create random input and output data
20+
x = np.random.randn(N, D_in)
21+
y = np.random.randn(N, D_out)
22+
23+
# Randomly initialize weights
24+
w1 = np.random.randn(D_in, H)
25+
w2 = np.random.randn(H, D_out)
26+
27+
learning_rate = 1e-6
28+
for t in range(500):
29+
# Forward pass: compute predicted y
30+
h = x.dot(w1)
31+
h_relu = np.maximum(h, 0)
32+
y_pred = h_relu.dot(w2)
33+
34+
# Compute and print loss
35+
loss = np.square(y_pred - y).sum()
36+
print(t, loss)
37+
38+
# Backprop to compute gradients of w1 and w2 with respect to loss
39+
grad_y_pred = 2.0 * (y_pred - y)
40+
grad_w2 = h_relu.T.dot(grad_y_pred)
41+
grad_h_relu = grad_y_pred.dot(w2.T)
42+
grad_h = grad_h_relu.copy()
43+
grad_h[h < 0] = 0
44+
grad_w1 = x.T.dot(grad_h)
45+
46+
# Update weights
47+
w1 -= learning_rate * grad_w1
48+
w2 -= learning_rate * grad_w2

tensor/two_layer_net_tensor.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import torch
2+
3+
"""
4+
A fully-connected ReLU network with one hidden layer and no biases, trained to
5+
predict y from x by minimizing squared Euclidean distance.
6+
7+
This implementation uses PyTorch tensors to manually compute the forward pass,
8+
loss, and backward pass.
9+
10+
A PyTorch Tensor is basically the same as a numpy array: it does not know
11+
anything about deep learning or computational graphs or gradients, and is just
12+
a generic n-dimensional array to be used for arbitrary numeric computation.
13+
14+
The biggest difference between a numpy array and a PyTorch Tensor is that
15+
a PyTorch Tensor can run on either CPU or GPU. To run operations on the GPU,
16+
just cast the Tensor to a cuda datatype.
17+
"""
18+
19+
dtype = torch.FloatTensor
20+
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
21+
22+
# N is batch size; D_in is input dimension;
23+
# H is hidden dimension; D_out is output dimension.
24+
N, D_in, H, D_out = 64, 1000, 100, 10
25+
26+
# Create random input and output data
27+
x = torch.randn(N, D_in).type(dtype)
28+
y = torch.randn(N, D_out).type(dtype)
29+
30+
# Randomly initialize weights
31+
w1 = torch.randn(D_in, H).type(dtype)
32+
w2 = torch.randn(H, D_out).type(dtype)
33+
34+
learning_rate = 1e-6
35+
for t in range(500):
36+
# Forward pass: compute predicted y
37+
h = x.mm(w1)
38+
h_relu = h.clamp(min=0)
39+
y_pred = h_relu.mm(w2)
40+
41+
# Compute and print loss
42+
loss = (y_pred - y).pow(2).sum()
43+
print(t, loss)
44+
45+
# Backprop to compute gradients of w1 and w2 with respect to loss
46+
grad_y_pred = 2.0 * (y_pred - y)
47+
grad_w2 = h_relu.t().mm(grad_y_pred)
48+
grad_h_relu = grad_y_pred.mm(w2.t())
49+
grad_h = grad_h_relu.clone()
50+
grad_h[h < 0] = 0
51+
grad_w1 = x.t().mm(grad_h)
52+
53+
# Update weights using gradient descent
54+
w1 -= learning_rate * grad_w1
55+
w2 -= learning_rate * grad_w2

tf_two_layer_net.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

two_layer_net_autograd.py

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)