Update main.py

yunjey · web-flow · commit be4c08b0fff6 · 2017-09-16T00:43:12.000+09:00
diff --git a/tutorials/02-intermediate/generative_adversarial_network/main.py b/tutorials/02-intermediate/generative_adversarial_network/main.py
@@ -64,22 +64,23 @@ def denorm(x):
         # Build mini-batch dataset
         batch_size = images.size(0)
         images = to_var(images.view(batch_size, -1))
+        
         # Create the labels which are later used as input for the BCE loss
         real_labels = to_var(torch.ones(batch_size))
         fake_labels = to_var(torch.zeros(batch_size))
 
         #============= Train the discriminator =============#
-        # Compute loss with real images
+        # Compute BCE_Loss using real images where BCE_Loss(x, y): - y * log(D(x)) - (1-y) * log(1 - D(x))
+        # Second term of the loss is always zero since real_labels == 1
         outputs = D(images)
-        # Apply BCE loss. Second term is always zero since real_labels == 1
         d_loss_real = criterion(outputs, real_labels)
         real_score = outputs
         
-        # Compute loss with fake images
+        # Compute BCELoss using fake images
+        # First term of the loss is always zero since fake_labels == 0
         z = to_var(torch.randn(batch_size, 64))
         fake_images = G(z)
         outputs = D(fake_images)
-        # Apply BCE loss. First term is always zero since fake_labels == 0
         d_loss_fake = criterion(outputs, fake_labels)
         fake_score = outputs
         
@@ -94,11 +95,9 @@ def denorm(x):
         z = to_var(torch.randn(batch_size, 64))
         fake_images = G(z)
         outputs = D(fake_images)
-        # remember that min log(1-D(G(z))) has the same fix point as max log(D(G(z)))
-        # Here we maximize log(D(G(z))), which is exactly the first term in the BCE loss
-        # with t=1. (see definition of BCE for info on t)
-        # t==1 is valid for real_labels, thus we use them as input for the BCE loss.
-        # Don't get yourself confused by this. It is just convenient to use to the BCE loss.
+        
+        # We train G to maximize log(D(G(z)) instead of minimizing log(1-D(G(z)))
+        # For the reason, see the last paragraph of section 3. https://arxiv.org/pdf/1406.2661.pdf
         g_loss = criterion(outputs, real_labels)
         
         # Backprop + Optimize