add debugging

dpfried · dpfried · commit dcbe46ffca21 · 2021-01-07T18:48:23.000-07:00
diff --git a/captioning/data/dataloader.py b/captioning/data/dataloader.py
@@ -231,8 +231,8 @@ def collate_func(self, batch, split):
         # #sort by att_feat length
         # fc_batch, att_batch, label_batch, gts, infos = \
         #     zip(*sorted(zip(fc_batch, att_batch, np.vsplit(label_batch, batch_size), gts, infos), key=lambda x: len(x[1]), reverse=True))
-        fc_batch, att_batch, label_batch, gts, infos = \
-            zip(*sorted(zip(fc_batch, att_batch, label_batch, gts, infos), key=lambda x: 0, reverse=True))
+        # fc_batch, att_batch, label_batch, gts, infos = \
+        #     zip(*sorted(zip(fc_batch, att_batch, label_batch, gts, infos), key=lambda x: 0, reverse=True))
         data = {}
         data['fc_feats'] = np.stack(fc_batch)
         # merge att_feats
diff --git a/captioning/data/pth_loader.py b/captioning/data/pth_loader.py
@@ -218,8 +218,8 @@ def collate_func(self, batch):
         # #sort by att_feat length
         # fc_batch, att_batch, label_batch, gts, infos = \
         #     zip(*sorted(zip(fc_batch, att_batch, np.vsplit(label_batch, batch_size), gts, infos), key=lambda x: len(x[1]), reverse=True))
-        fc_batch, att_batch, label_batch, gts, infos = \
-            zip(*sorted(zip(fc_batch, att_batch, label_batch, gts, infos), key=lambda x: 0, reverse=True))
+        # fc_batch, att_batch, label_batch, gts, infos = \
+        #     zip(*sorted(zip(fc_batch, att_batch, label_batch, gts, infos), key=lambda x: 0, reverse=True))
         data = {}
         data['fc_feats'] = np.stack(fc_batch)
         # merge att_feats
diff --git a/captioning/models/AttModel.py b/captioning/models/AttModel.py
@@ -36,20 +36,33 @@ def sort_pack_padded_sequence(input, lengths):
     tmp = pack_padded_sequence(input[indices], sorted_lengths, batch_first=True)
     inv_ix = indices.clone()
     inv_ix[indices] = torch.arange(0,len(indices)).type_as(inv_ix)
+    # inv_ix = torch.arange(0, len(indices)).type_as(indices)[indices]
     return tmp, inv_ix
 
 def pad_unsort_packed_sequence(input, inv_ix):
     tmp, _ = pad_packed_sequence(input, batch_first=True)
     tmp = tmp[inv_ix]
     return tmp
 
-def pack_wrapper(module, att_feats, att_masks):
+def pack_wrapper_old(module, att_feats, att_masks):
     if att_masks is not None:
         packed, inv_ix = sort_pack_padded_sequence(att_feats, att_masks.data.long().sum(1))
         return pad_unsort_packed_sequence(PackedSequence(module(packed[0]), packed[1]), inv_ix)
     else:
         return module(att_feats)
 
+def pack_wrapper(module, att_feats, att_masks):
+    if att_masks is not None:
+        packed = pack_padded_sequence(att_feats, att_masks.data.long().sum(1), enforce_sorted=False, batch_first=True)
+        padded = pad_packed_sequence(PackedSequence(
+            data=module(packed.data), sorted_indices=packed.sorted_indices,
+            unsorted_indices=packed.unsorted_indices, batch_sizes=packed.batch_sizes
+        ),
+            batch_first=True)[0]
+        return padded
+    else:
+        return module(att_feats)
+
 class AttModel(CaptionModel):
     def __init__(self, opt):
         super(AttModel, self).__init__()
@@ -117,12 +130,12 @@ def _prepare_feature(self, fc_feats, att_feats, att_masks):
 
         # embed fc and att feats
         fc_feats = self.fc_embed(fc_feats)
-        att_feats = pack_wrapper(self.att_embed, att_feats, att_masks)
+        att_feats_wrapped = pack_wrapper(self.att_embed, att_feats, att_masks)
 
         # Project the attention feats first to reduce memory and computation comsumptions.
-        p_att_feats = self.ctx2att(att_feats)
+        p_att_feats = self.ctx2att(att_feats_wrapped)
 
-        return fc_feats, att_feats, p_att_feats, att_masks
+        return fc_feats, att_feats_wrapped, p_att_feats, att_masks
 
     def _forward(self, fc_feats, att_feats, seq, att_masks=None):
         batch_size = fc_feats.size(0)
@@ -353,13 +366,54 @@ def unflat_view(tensor):
             assert tensor.size(0) == batch_size * per_image_dim
             return tensor.view((batch_size, per_image_dim) + tensor.size()[1:])
 
-        # p_fc_feats, p_att_feats, pp_att_feats, p_att_masks = self._prepare_feature(fc_feats, att_feats, att_masks)
+        prepped_non_neighbor_feats = self._prepare_feature(fc_feats, att_feats, att_masks)
         # p_fc_feats_a, p_att_feats_a, pp_att_feats_a, p_att_masks_a =
         prepped_feats = self._prepare_feature(
             flat_view(neighbor_batch['fc_feats'].to(device)),
             flat_view(neighbor_batch['att_feats'].to(device)),
             flat_view(neighbor_batch['att_masks'].to(device)) if neighbor_batch['att_masks'] is not None else None,
         )
+        prepped_feats_trunc = self._prepare_feature(
+            flat_view(neighbor_batch['fc_feats'].to(device)),
+            flat_view(neighbor_batch['att_feats'][:,:,:prepped_non_neighbor_feats[1].size(1)].to(device)),
+            flat_view(neighbor_batch['att_masks'][:,:,:prepped_non_neighbor_feats[1].size(1)].to(device)) if neighbor_batch['att_masks'] is not None else None,
+        )
+
+        clipped = self.clip_att(
+            att_feats,
+            att_masks,
+        )
+        clipped_wrapped = pack_wrapper(self.att_embed, *clipped)
+
+        clipped_trunc = self.clip_att(
+            flat_view(neighbor_batch['att_feats'][:,:,:prepped_non_neighbor_feats[1].size(1)].to(device)),
+            flat_view(neighbor_batch['att_masks'][:,:,:prepped_non_neighbor_feats[1].size(1)].to(device)) if neighbor_batch['att_masks'] is not None else None,
+        )
+        clipped_trunc_wrapped = pack_wrapper(self.att_embed, *clipped_trunc)
+
+        embedded = self.att_embed(clipped[0])
+        embedded_trunc = self.att_embed(clipped_trunc[0])
+
+        # this passes:
+        # torch.allclose(prepped_non_neighbor_feats[0].view(10, -1), prepped_feats_trunc[0].view(10, 9, -1)[:,0])
+        # this fails:
+        # torch.allclose(prepped_non_neighbor_feats[1].view(10, -1), prepped_feats_trunc[1].view(10, 9, -1)[:,0])
+
+        # these both pass
+        # torch.allclose(clipped[0].view(10, -1), clipped_trunc[0].view(10, 9, -1)[:,0])
+        # torch.allclose(clipped[1].view(10, -1), clipped_trunc[1].view(10, 9, -1)[:,0])
+
+        # this fails:
+        # torch.allclose(clipped_trunc_wrapped.view(10, 9, -1)[:,0], clipped_wrapped.view(10, -1))
+
+        # torch.allclose(clipped[0].view(10, -1), clipped_trunc[0].view(10, 9, -1)[:,0])
+
+        # torch.allclose(self.att_embed(clipped[0]).view(10, -1), self.att_embed(clipped_trunc[0]).view(10, 9, -1)[:,0])
+        # Out[8]: False
+        # torch.allclose(self.att_embed(clipped[0]).view(10, -1), self.att_embed(clipped_trunc[0]).view(10, 9, -1)[:,0], atol=1e-4)
+        # Out[22]: True
+        # torch.allclose(self.att_embed(clipped[0]), self.att_embed(clipped_trunc[0].view(10, 9, *clipped[0].size()[1:])[:,0]))
+        # Out[9]: True
 
         assert beam_size <= self.vocab_size + 1, 'lets assume this for now, otherwise this corner case causes a few headaches down the road. can be dealt with in future if needed'
         seq = fc_feats.new_full((batch_size*sample_n, self.seq_length), self.pad_idx, dtype=torch.long)
@@ -372,16 +426,21 @@ def unflat_view(tensor):
 
         # first step, feed bos
         it = fc_feats.new_full([batch_size*per_image_dim], self.bos_idx, dtype=torch.long)
+        it_non_neighbor = fc_feats.new_full([batch_size], self.bos_idx, dtype=torch.long)
         # batch_size*per_image_dim x V
         logprobs, state = self.get_logprobs_state(it, *(prepped_feats + (state,)))
+        logprobs_non_neighbor, state_non_neighbor = self.get_logprobs_state(it_non_neighbor, *(prepped_non_neighbor_feats + (self.init_hidden(batch_size),)))
         # logprobs, state = self.get_logprobs_state(it, p_fc_feats_a, p_att_feats_a, pp_att_feats_a, p_att_masks_a, state)
 
-        # (batch_size*beam_size) x per_image_view x ...
+        # (batch_size*beam_size*per_image_view) x ...
         repeated_feats = (combine_first_two(ten) for ten in utils.repeat_tensors(
             beam_size,
             [unflat_view(t) for t in prepped_feats]
             # [p_fc_feats_a, p_att_feats_a, pp_att_feats_a, p_att_masks_a]
         ))
+        repeated_non_neighbor_feats = utils.repeat_tensors(beam_size,
+                                                           prepped_non_neighbor_feats)
+        done_beams_non_neighbor = self.beam_search(state_non_neighbor, logprobs_non_neighbor, *repeated_non_neighbor_feats, opt=opt)
         self.done_beams = self.contrastive_beam_search(
             state, logprobs, *repeated_feats, opt=opt
         )
diff --git a/captioning/models/CaptionModel.py b/captioning/models/CaptionModel.py
@@ -241,10 +241,10 @@ def add_diversity(beam_seq_table, logprobs, t, divm, diversity_lambda, bdash):
 
     def contrastive_beam_search(self, init_state, init_logprobs, *args, **kwargs):
         # init_logprobs: batch_size*(num_distractors+1) x (vocab_size+1)
-        # args: each tensor is (batch_size*beam_size) x per_image_view x ...
+        # args: each tensor is (batch_size*beam_size*per_image_dim) x ...
 
-        # state: tuple of tensors (2 x batch_size*beam_size*per_image_view x d)
-        # init_state: (2 x batch_size*1*per_image_view x d) [beam_size initially like 0; call this "this_beam_size" later]
+        # state: tuple of tensors (2 x batch_size*beam_size*per_image_dim x d)
+        # init_state: (2 x batch_size*1*per_image_dim x d) [beam_size initially like 0; call this "this_beam_size" later]
 
         # does one step of classical beam search
 
@@ -253,6 +253,8 @@ def contrastive_beam_search(self, init_state, init_logprobs, *args, **kwargs):
         temperature = opt.get('temperature', 1) # This should not affect beam search, but will affect dbs
         beam_size = opt.get('beam_size', 10)
         diversity_lambda = opt.get('diversity_lambda', 0.5)
+        if (diversity_lambda != 0.0):
+            raise NotImplementedError()
         decoding_constraint = opt.get('decoding_constraint', 0)
         remove_bad_endings = opt.get('remove_bad_endings', 0)
         suppress_UNK = opt.get('suppress_UNK', 0)
@@ -264,7 +266,7 @@ def contrastive_beam_search(self, init_state, init_logprobs, *args, **kwargs):
 
         per_image_dim = (num_distractors+1)
         batch_size = init_logprobs.shape[0] // per_image_dim
-        assert args[0].size(0) == batch_size * per_image_dim * beam_size
+        assert args[0].size(0) == batch_size * beam_size * per_image_dim
 
         V = init_logprobs.size(-1)
 
@@ -311,7 +313,8 @@ def contrastive_beam_search(self, init_state, init_logprobs, *args, **kwargs):
 
             logprobs = log_s1[:,0]
             if t == 0:
-                assert torch.allclose(logprobs[:,0], logprobs[:,1])
+                if beam_size > 1:
+                    assert torch.allclose(logprobs[:,0], logprobs[:,1])
                 logprobs = logprobs[:,0]
             logprobs = logprobs.contiguous().view(-1, V)
 
diff --git a/eval_literal.sh b/eval_literal.sh
@@ -5,7 +5,7 @@ model_dir="models/updown"
 split=$1
 beam_size=$2
 
-id="literal_${split}_bs-${beam_size}"
+id="literal_${split}_bs-${beam_size}_dl-0.0"
 
 python -u tools/eval.py \
 	--id $id \
@@ -20,4 +20,5 @@ python -u tools/eval.py \
   --infos_path ${model_dir}/infos_tds-best.pkl \
   --language_eval 1 \
   --beam_size $beam_size \
+  --diversity_lambda 0.0 \
   | tee expts/${id}