initial commit

jcjohnson · jcjohnson · commit d28debf55819 · 2017-01-22T22:04:47.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.swp
diff --git a/recurrent_net.py b/recurrent_net.py
@@ -0,0 +1,36 @@
+import torch
+from torch.autograd import Variable
+
+
+class RecurrentNet(torch.nn.Module):
+  def __init__(self, D_in, H, D_out):
+    super(RecurrentNet, self).__init__()
+    self.linear1 = torch.nn.Linear(D_in, H)
+    self.recurrent_linear = torch.nn.Linear(H, H)
+    self.linear2 = torch.nn.Linear(H, D_out)
+
+  def forward(self, x):
+    h_relu = self.linear1(x).clamp(min=0)
+    for _ in range(3):
+      h_relu = self.recurrent_linear(h_relu).clamp(min=0)
+    y_pred = self.linear2(h_relu)
+    return y_pred
+
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = Variable(torch.randn(N, D_in))
+y = Variable(torch.randn(N, D_out), requires_grad=False)
+
+model = RecurrentNet(D_in, H, D_out)
+criterion = torch.nn.MSELoss(size_average=False)
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
+for t in range(500):
+  y_pred = model(x)
+  loss = criterion(y_pred, y)
+  print(t, loss.data[0])
+
+  model.zero_grad()
+  loss.backward()
+  optimizer.step()
+
diff --git a/tf_two_layer_net.py b/tf_two_layer_net.py
@@ -0,0 +1,32 @@
+import tensorflow as tf
+import numpy as np
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = tf.placeholder(tf.float32, shape=(None, D_in))
+y = tf.placeholder(tf.float32, shape=(None, D_out))
+
+w1 = tf.Variable(tf.random_normal((D_in, H)))
+w2 = tf.Variable(tf.random_normal((H, D_out)))
+
+h = tf.matmul(x, w1)
+h_relu = tf.maximum(h, tf.zeros(1))
+y_pred = tf.matmul(h_relu, w2)
+loss = tf.reduce_sum((y - y_pred) ** 2.0)
+
+learning_rate = 1e-6
+grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])
+new_w1 = w1.assign(w1 - learning_rate * grad_w1)
+new_w2 = w2.assign(w2 - learning_rate * grad_w2)
+
+
+with tf.Session() as sess:
+  sess.run(tf.global_variables_initializer())
+
+  x_value = np.random.randn(N, D_in)
+  y_value = np.random.randn(N, D_out)
+  for _ in range(500):
+    loss_value, _, _ = sess.run(
+                        [loss, new_w1, new_w2],
+                        feed_dict={x: x_value, y: y_value})
+    print(loss_value)
diff --git a/two_layer_net_autograd.py b/two_layer_net_autograd.py
@@ -0,0 +1,23 @@
+import torch
+from torch.autograd import Variable
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = Variable(torch.randn(N, D_in))
+y = Variable(torch.randn(N, D_out))
+
+w1 = Variable(torch.randn(D_in, H), requires_grad=True)
+w2 = Variable(torch.randn(H, D_out), requires_grad=True)
+
+learning_rate = 1e-6
+for t in range(500):
+  y_pred = x.mm(w1).clamp(min=0).mm(w2)
+  loss = (y_pred - y).pow(2).sum()
+
+  w1.grad.data.zero_()
+  w2.grad.data.zero_()
+  loss.backward()
+
+  print(t, loss.data[0])
+  w1.data -= learning_rate * w1.grad.data
+  w2.data -= learning_rate * w2.grad.data
diff --git a/two_layer_net_module.py b/two_layer_net_module.py
@@ -0,0 +1,33 @@
+import torch
+from torch.autograd import Variable
+
+
+class TwoLayerNet(torch.nn.Module):
+  def __init__(self, D_in, H, D_out):
+    super(TwoLayerNet, self).__init__()
+    self.linear1 = torch.nn.Linear(D_in, H)
+    self.linear2 = torch.nn.Linear(H, D_out)
+
+  def forward(self, x):
+    h_relu = self.linear1(x).clamp(min=0)
+    y_pred = self.linear2(h_relu)
+    return y_pred
+
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = Variable(torch.randn(N, D_in))
+y = Variable(torch.randn(N, D_out), requires_grad=False)
+
+model = TwoLayerNet(D_in, H, D_out)
+criterion = torch.nn.MSELoss(size_average=False)
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
+for t in range(500):
+  y_pred = model(x)
+  loss = criterion(y_pred, y)
+  print(t, loss.data[0])
+
+  model.zero_grad()
+  loss.backward()
+  optimizer.step()
+
diff --git a/two_layer_net_nn.py b/two_layer_net_nn.py
@@ -0,0 +1,25 @@
+import torch
+from torch.autograd import Variable
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = Variable(torch.randn(N, D_in))
+y = Variable(torch.randn(N, D_out), requires_grad=False)
+
+model = torch.nn.Sequential(
+          torch.nn.Linear(D_in, H),
+          torch.nn.ReLU(),
+          torch.nn.Linear(H, D_out),
+        )
+criterion = torch.nn.MSELoss(size_average=False)
+
+learning_rate = 1e-4
+for t in range(500):
+  y_pred = model(x)
+  loss = criterion(y_pred, y)
+  print(t, loss.data[0])
+  
+  model.zero_grad()
+  loss.backward()
+  for param in model.parameters():
+    param.data -= learning_rate * param.grad.data
diff --git a/two_layer_net_optim.py b/two_layer_net_optim.py
@@ -0,0 +1,25 @@
+import torch
+from torch.autograd import Variable
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = Variable(torch.randn(N, D_in))
+y = Variable(torch.randn(N, D_out), requires_grad=False)
+
+model = torch.nn.Sequential(
+          torch.nn.Linear(D_in, H),
+          torch.nn.ReLU(),
+          torch.nn.Linear(H, D_out),
+        )
+criterion = torch.nn.MSELoss(size_average=False)
+
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
+for t in range(500):
+  y_pred = model(x)
+  loss = criterion(y_pred, y)
+  print(t, loss.data[0])
+
+  model.zero_grad()
+  loss.backward()
+  optimizer.step()
+
diff --git a/two_layer_net_tensor.py b/two_layer_net_tensor.py
@@ -0,0 +1,28 @@
+import torch
+
+N, D_in, H, D_out = 64, 1000, 100, 10
+
+x = torch.randn(N, D_in)
+y = torch.randn(N, D_out)
+
+w1 = torch.randn(D_in, H)
+w2 = torch.randn(H, D_out)
+
+learning_rate = 1e-6
+for t in range(500):
+  h = x.mm(w1)
+  h_relu = h.clamp(min=0)
+  y_pred = h_relu.mm(w2)
+
+  loss = (y_pred - y).pow(2).sum()
+  print(t, loss)
+
+  grad_y_pred = 2.0 * (y_pred - y)
+  grad_w2 = h_relu.t().mm(grad_y_pred)
+  grad_h_relu = grad_y_pred.mm(w2.t())
+  grad_h = grad_h_relu.clone()
+  grad_h[h < 0] = 0
+  grad_w1 = x.t().mm(grad_h)
+
+  w1 -= learning_rate * grad_w1
+  w2 -= learning_rate * grad_w2