From f24dae7855d70c74c385d75065d5b79d79e52025 Mon Sep 17 00:00:00 2001
From: zhangheng19931123 <zhangheng19931123@gmail.com>
Date: Tue, 13 Oct 2020 01:16:00 +0200
Subject: [PATCH] first commit

---
 README.md       | 68 -------------------------------------------------
 main.py         |  5 ++--
 models/pafpn.py | 16 ++++++------
 3 files changed, 11 insertions(+), 78 deletions(-)
 delete mode 100644 README.md
diff --git a/README.md b/README.md
deleted file mode 100644
index d7835b6..0000000
--- a/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Localize to Classify and Classify to Localize: Mutual Guidance in Object Detection
-By Heng Zhang, Elisa FROMONT, Sébastien LEFEVRE, Bruno AVIGNON
-## Introduction
-Most deep learning object detectors are based on the anchor mechanism and resort to the Intersection over Union (IoU) between predefined anchor boxes and ground truth boxes to evaluate the matching quality between anchors and objects. In this paper, we question this use of IoU and propose a new anchor matching criterion guided, during the training phase, by the optimization of both the localization and the classification tasks: the predictions related to one task are used to dynamically assign sample anchors and improve the model on the other task, and vice versa. This is the Pytorch implementation of Mutual Guidance detectors. For more details, please refer to our [ACCV paper](https://arxiv.org/pdf/2009.14085.pdf).
-<img align="center" src="https://github.com/zhangheng19931123/MutualGuide/blob/master/doc/compare.png">
-&nbsp;
-&nbsp;
-## Experimental results
-### VOC2007 Test
-| **Detector** | **Resolution** | **mAP** | **AP50** | **AP75** | **Trained model** |
-|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|
-| FSSD (VGG16) | 320x320 | 54.1 | 80.1 | 58.3 | [Google Drive](https://drive.google.com/file/d/1IOTIyS9hZY7-g3RP2p3OkcVmtGdmWJIc/view?usp=sharing) |
-| FSSD (VGG16) + MG | 320x320 | **56.2** | **80.4** | **61.4** | [Google Drive](https://drive.google.com/file/d/1bFVrBPPQDymstgjwlss3AUK6WrN-iszr/view?usp=sharing) |
-| RetinaNet (VGG16) | 320x320 | 55.2 | 80.2 | 59.6 | [Google Drive](https://drive.google.com/file/d/1c3bGwtFRD9GvxdyqDq1jknlZvRPpxjUi/view?usp=sharing) |
-| RetinaNet (VGG16) + MG | 320x320 | **57.7** | **81.1** | **62.9** | [Google Drive](https://drive.google.com/file/d/1vviR8H6xHfvY5Q4DDmZQ-lWLjEpPqrLr/view?usp=sharing) |
-| RFBNet (VGG16) | 320x320 | 55.6 | 80.9 | 59.6 | [Google Drive](https://drive.google.com/file/d/1MOM4pTh4TQ1l3ADFqT-BLL9RoSJK33v3/view?usp=sharing) |
-| RFBNet (VGG16) + MG | 320x320 | **57.9** | **81.5** | **62.6** | [Google Drive](https://drive.google.com/file/d/1Nb6NPa4aNfz49NhGeTTfgW2vR-UVUzIz/view?usp=sharing) |
-| RetinaNet (VGG16) + PAFPN | 320x320 | 58.1 | 81.7 | 63.3 | |
-| RetinaNet (VGG16) + PAFPN + MG | 320x320 | **59.5** | **82.3** | **64.2** | |
-### COCO2017 Val
-| **Detector** | **Resolution** | **mAP** | **AP50** | **AP75** | **FPS** (V100) | **Trained model** |
-|:-------|:-----:|:-------:|:-------:|:-------:|:-------:|:-------:|
-| FSSD (VGG16) | 320x320 | 31.1 | 48.9 | 32.7 | 365 | [Google Drive](https://drive.google.com/file/d/1i6frTMPX1Bi-OpTZEyRYsPQTnAPyEplb/view?usp=sharing) |
-| FSSD (VGG16) + MG | 320x320 | **32.0** | **49.3** | **33.9** | 365 | [Google Drive](https://drive.google.com/file/d/1bSOTSRMPkc6WDiL8AdKtvaZKbxtbEGFp/view?usp=sharing) |
-| RetinaNet (VGG16) | 320x320 | 32.3 | 50.3 | 34.0 | 270 | [Google Drive](https://drive.google.com/file/d/1Gx0I1sTqgFmUtQln0NPrT4_k9x2VCIUM/view?usp=sharing) |
-| RetinaNet (VGG16) + MG | 320x320 | **33.6** | **50.8** | **35.7** | 270 | [Google Drive](https://drive.google.com/file/d/12Af5Pz-Zsl8oww7NjDmjWjvT0br6zFTn/view?usp=sharing) |
-| RFBNet (VGG16) | 320x320 | 33.4 | 51.6 | 35.1 | 115 | [Google Drive](https://drive.google.com/file/d/1KnNcYBCKA53MJ70rpRoMk-Q247FVTH4K/view?usp=sharing) |
-| RFBNet (VGG16) + MG | 320x320 | **34.6** | **52.0** | **36.8** | 115 | [Google Drive](https://drive.google.com/file/d/1rZ_hKWLGASDlRKNEEdAA5V0vb6st5Sqk/view?usp=sharing) |
-| RetinaNet (VGG16) + PAFPN | 320x320 | 33.9 | 51.9 | 35.7 | 220 | [Google Drive](https://drive.google.com/file/d/13zBaiJ7LvlvPBogKB069OPhuV6JLKZzg/view?usp=sharing) |
-| RetinaNet (VGG16) + PAFPN + MG | 320x320 | **35.3** | **52.4** | **37.3** | 220 | [Google Drive](https://drive.google.com/file/d/1IC18t7wnnm1Wk8q9UpkPzGy2-g68_uyY/view?usp=sharing) |
-| RetinaNet (VGG16) | 512x512 | 37.1 | 56.5 | 39.5 | 250 | |
-| RetinaNet (VGG16) + MG | 512x512 | **38.2** | **56.6** | **41.0** | 250 | |
-| RetinaNet (VGG16) + PAFPN | 512x512 | 38.5 | 57.6 | 41.0 | 195 | [Google Drive](https://drive.google.com/file/d/1yBllIGiix3FF5njQzV39Uhbz4dSAJrgO/view?usp=sharing) |
-| RetinaNet (VGG16) + PAFPN + MG | 512x512 | **39.4** | **57.5** | **42.3** | 195 | [Google Drive](https://drive.google.com/file/d/1kj0auR9w2zZeSSffFuS-MS0pX07Ro61T/view?usp=sharing) |
-## Datasets
-First download the VOC and COCO dataset, you may find the sripts in `data/scripts/` useful.
-Then create a folder named `datasets` and link the downloaded datasets inside:
-```Shell
-$ mkdir datasets
-$ ln -s /path_to_your_voc_dataset datasets/VOCdevkit
-$ ln -s /path_to_your_coco_dataset datasets/coco2017
-```
-Finally prepare folders to save evaluation results:
-```Shell
-$ mkdir eval
-$ mkdir eval/COCO
-$ mkdir eval/VOC
-```
-## Training
-For training with Mutual Guide:
-```Shell
-$ python3 main.py --version fssd --backbone vgg16 --dataset voc --size 320 --mutual_guide
-                            retinanet       resnet18        coco       512
-                            rfbnet
-                            pafpn
-```
-**Remarks:**
-- For training without Mutual Guide, just remove the '--mutual_guide';
-- The default folder to save trained model is `weights/`.
-## Evaluation
-Every time you want to evaluate a trained network:
-```Shell
-$ python3 main.py --version fssd --backbone vgg16 --dataset voc --size 320 --trained_model path_to_saved_weights
-                            retinanet       resnet18        coco       512
-                            pafpn
-                            rfbnet
-```
-It will directly print the mAP, AP50 and AP50 results on VOC2007 Test or COCO2017 Val.
diff --git a/main.py b/main.py
index fc0af75..4ccdbce 100644
--- a/main.py
+++ b/main.py
@@ -265,8 +265,9 @@ def eval_model(
             images = Variable(images.cuda())
             targets = [Variable(anno.cuda()) for anno in targets]
             out = model(images)
-            (loss_l, loss_c) = criterion(out, priors, targets)
-            loss = loss_l + loss_c
+            (loss_l, loss_c) = criterion(out[:2], priors, targets)
+            fc_pred = out[-1]
+            loss = loss_l + loss_c + F.binary_cross_entropy_with_logits(fc_pred, fc_target)
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
diff --git a/models/pafpn.py b/models/pafpn.py
index 56f05b2..d5b0c05 100644
--- a/models/pafpn.py
+++ b/models/pafpn.py
@@ -12,7 +12,7 @@ class CEM(nn.Module):
 
     """Context Enhancement Module"""
 
-    def __init__(self, channels, fea_channel=256):
+    def __init__(self, channels, fea_channel=256, num_classes=20):
         super(CEM, self).__init__()
         self.cv1 = BasicConv(channels[0], fea_channel, kernel_size=1,
                              padding=0)
@@ -21,12 +21,15 @@ def __init__(self, channels, fea_channel=256):
         self.gap = nn.AdaptiveAvgPool2d(1)
         self.cv3 = BasicConv(channels[1], fea_channel, kernel_size=1,
                              padding=0)
+        self.fc = nn.Linear(fea_channel, num_classes)
 
     def forward(self, inputs):
         C4_lat = self.cv1(inputs[0])
         C5_lat = self.cv2(inputs[1])
         Cglb_lat = self.cv3(self.gap(inputs[1]))
-        return C4_lat + C5_lat + Cglb_lat
+        fc_lat = torch.flatten(Cglb_lat, 1)
+        fc_lat = self.fc(fc_lat)
+        return C4_lat + C5_lat + Cglb_lat, fc_lat
 
 
 def fpn_feature_extractor(fpn_level, fea_channel=256):
@@ -110,7 +113,7 @@ def __init__(
 
         # Extra layers
 
-        self.ft_module = CEM(channels)
+        self.ft_module = CEM(channels, self.num_classes)
         self.pyramid_ext = fpn_feature_extractor(self.fpn_level)
         self.lateral_convs = lateral_convs(self.fpn_level)
         self.fpn_convs = fpn_convs(self.fpn_level)
@@ -144,7 +147,7 @@ def forward(self, x):
         # backbone
 
         source_features = self.backbone(x)
-        x = self.ft_module(source_features)
+        x, fc = self.ft_module(source_features)
 
         # detection
 
@@ -177,10 +180,7 @@ def forward(self, x):
 
         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
-        return (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0),
-                -1, self.num_classes))  # loc preds
-                                        # conf preds
-
+        return (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), fc)
 
 def build_net(size=320, num_classes=20, backbone='vgg16'):