gdbgit
diff --git a/‎tests/test_models.py
Lines changed: 26 additions & 2 deletions b/‎tests/test_models.py
Lines changed: 26 additions & 2 deletions
diff --git a/‎timm/models/densenet.py
Lines changed: 6 additions & 10 deletions b/‎timm/models/densenet.py
Lines changed: 6 additions & 10 deletions
diff --git a/‎timm/models/dla.py
Lines changed: 8 additions & 11 deletions b/‎timm/models/dla.py
Lines changed: 8 additions & 11 deletions
diff --git a/‎timm/models/dpn.py
Lines changed: 9 additions & 12 deletions b/‎timm/models/dpn.py
Lines changed: 9 additions & 12 deletions
diff --git a/‎timm/models/efficientnet.py
Lines changed: 12 additions & 24 deletions b/‎timm/models/efficientnet.py
Lines changed: 12 additions & 24 deletions
diff --git a/‎timm/models/gluon_xception.py
Lines changed: 4 additions & 6 deletions b/‎timm/models/gluon_xception.py
Lines changed: 4 additions & 6 deletions
diff --git a/‎timm/models/helpers.py
Lines changed: 5 additions & 2 deletions b/‎timm/models/helpers.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎timm/models/hrnet.py
Lines changed: 6 additions & 10 deletions b/‎timm/models/hrnet.py
Lines changed: 6 additions & 10 deletions
@@ -4,8 +4,14 @@
 import os
 import fnmatch
 
+import timm
 from timm import list_models, create_model, set_scriptable
 
+if hasattr(torch._C, '_jit_set_profiling_executor'):
+    # legacy executor is too slow to compile large models for unit tests
+    # no need for the fusion performance here
+    torch._C._jit_set_profiling_executor(True)
+    torch._C._jit_set_profiling_mode(False)
 
 if 'GITHUB_ACTIONS' in os.environ:  # and 'Linux' in platform.system():
     # GitHub Linux runner is slower and hits memory limits sooner than MacOS, exclude bigger models
@@ -78,10 +84,28 @@ def test_model_default_cfgs(model_name, batch_size):
 
     if all([x <= MAX_FWD_FEAT_SIZE for x in input_size]) and \
             not any([fnmatch.fnmatch(model_name, x) for x in EXCLUDE_FILTERS]):
-        # pool size only checked if default res <= 448 * 448 to keep resource down
+        # output sizes only checked if default res <= 448 * 448 to keep resource down
         input_size = tuple([min(x, MAX_FWD_FEAT_SIZE) for x in input_size])
-        outputs = model.forward_features(torch.randn((batch_size, *input_size)))
+        input_tensor = torch.randn((batch_size, *input_size))
+
+        # test forward_features (always unpooled)
+        outputs = model.forward_features(input_tensor)
         assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2]
+
+        # test forward after deleting the classifier, output should be poooled, size(-1) == model.num_features
+        model.reset_classifier(0)
+        outputs = model.forward(input_tensor)
+        assert len(outputs.shape) == 2
+        assert outputs.shape[-1] == model.num_features
+
+        # test model forward without pooling and classifier
+        if not isinstance(model, timm.models.MobileNetV3):
+            model.reset_classifier(0, '')  # reset classifier and set global pooling to pass-through
+            outputs = model.forward(input_tensor)
+            assert len(outputs.shape) == 4
+            assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2]
+
+    # check classifier and first convolution names match those in default_cfg
     assert any([k.startswith(classifier) for k in state_dict.keys()]), f'{classifier} not in model params'
     assert any([k.startswith(first_conv) for k in state_dict.keys()]), f'{first_conv} not in model params'
 
 
@@ -14,7 +14,7 @@
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d, BatchNormAct2d, create_norm_act, BlurPool2d
+from .layers import BatchNormAct2d, create_norm_act, BlurPool2d, create_classifier
 from .registry import register_model
 
 __all__ = ['DenseNet']
@@ -236,8 +236,8 @@ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), bn_size=4, stem
         self.num_features = num_features
 
         # Linear layer
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
 
         # Official init from torch repo.
         for m in self.modules():
@@ -254,19 +254,15 @@ def get_classifier(self):
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        if num_classes:
-            num_features = self.num_features * self.global_pool.feat_mult()
-            self.classifier = nn.Linear(num_features, num_classes)
-        else:
-            self.classifier = nn.Identity()
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
 
     def forward_features(self, x):
         return self.features(x)
 
     def forward(self, x):
         x = self.forward_features(x)
-        x = self.global_pool(x).flatten(1)
+        x = self.global_pool(x)
         # both classifier and block drop?
         # if self.drop_rate > 0.:
         #     x = F.dropout(x, p=self.drop_rate, training=self.training)
 
@@ -13,7 +13,7 @@
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d
+from .layers import create_classifier
 from .registry import register_model
 
 __all__ = ['DLA']
@@ -286,9 +286,8 @@ def __init__(self, levels, channels, output_stride=32, num_classes=1000, in_chan
         ]
 
         self.num_features = channels[-1]
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes, 1, bias=True)
-
+        self.global_pool, self.fc = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool, use_conv=True)
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
@@ -313,12 +312,8 @@ def get_classifier(self):
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        if num_classes:
-            num_features = self.num_features * self.global_pool.feat_mult()
-            self.fc = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True)
-        else:
-            self.fc = nn.Identity()
+        self.global_pool, self.fc = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool, use_conv=True)
 
     def forward_features(self, x):
         x = self.base_layer(x)
@@ -336,7 +331,9 @@ def forward(self, x):
         if self.drop_rate > 0.:
             x = F.dropout(x, p=self.drop_rate, training=self.training)
         x = self.fc(x)
-        return x.flatten(1)
+        if not self.global_pool.is_identity():
+            x = x.flatten(1)  # conv classifier, flatten if pooling isn't pass-through (disabled)
+        return x
 
 
 def _create_dla(variant, pretrained=False, **kwargs):
 
@@ -19,7 +19,7 @@
 
 from timm.data import IMAGENET_DPN_MEAN, IMAGENET_DPN_STD, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d, BatchNormAct2d, create_conv2d, ConvBnAct
+from .layers import BatchNormAct2d, ConvBnAct, create_conv2d, create_classifier
 from .registry import register_model
 
 __all__ = ['DPN']
@@ -237,21 +237,16 @@ def _fc_norm(f, eps): return BatchNormAct2d(f, eps=eps, act_layer=fc_act, inplac
         self.features = nn.Sequential(blocks)
 
         # Using 1x1 conv for the FC layer to allow the extra pooling scheme
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        num_features = self.num_features * self.global_pool.feat_mult()
-        self.classifier = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True)
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool, use_conv=True)
 
     def get_classifier(self):
         return self.classifier
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        if num_classes:
-            num_features = self.num_features * self.global_pool.feat_mult()
-            self.classifier = nn.Conv2d(num_features, num_classes, kernel_size=1, bias=True)
-        else:
-            self.classifier = nn.Identity()
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool, use_conv=True)
 
     def forward_features(self, x):
         return self.features(x)
@@ -261,8 +256,10 @@ def forward(self, x):
         x = self.global_pool(x)
         if self.drop_rate > 0.:
             x = F.dropout(x, p=self.drop_rate, training=self.training)
-        out = self.classifier(x)
-        return out.flatten(1)
+        x = self.classifier(x)
+        if not self.global_pool.is_identity():
+            x = x.flatten(1)  # conv classifier, flatten if pooling isn't pass-through (disabled)
+        return x
 
 
 def _create_dpn(variant, pretrained=False, **kwargs):
 
@@ -35,7 +35,7 @@
 from .efficientnet_builder import EfficientNetBuilder, decode_arch_def, efficientnet_init_weights
 from .features import FeatureInfo, FeatureHooks
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d, create_conv2d
+from .layers import create_conv2d, create_classifier
 from .registry import register_model
 
 __all__ = ['EfficientNet']
@@ -336,53 +336,45 @@ def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3,
         self.num_classes = num_classes
         self.num_features = num_features
         self.drop_rate = drop_rate
-        self._in_chs = in_chans
 
         # Stem
         if not fix_stem:
             stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min)
-        self.conv_stem = create_conv2d(self._in_chs, stem_size, 3, stride=2, padding=pad_type)
+        self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
         self.bn1 = norm_layer(stem_size, **norm_kwargs)
         self.act1 = act_layer(inplace=True)
-        self._in_chs = stem_size
 
         # Middle stages (IR/ER/DS Blocks)
         builder = EfficientNetBuilder(
             channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
             norm_layer, norm_kwargs, drop_path_rate, verbose=_DEBUG)
-        self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
+        self.blocks = nn.Sequential(*builder(stem_size, block_args))
         self.feature_info = builder.features
-        self._in_chs = builder.in_chs
+        head_chs = builder.in_chs
 
         # Head + Pooling
-        self.conv_head = create_conv2d(self._in_chs, self.num_features, 1, padding=pad_type)
+        self.conv_head = create_conv2d(head_chs, self.num_features, 1, padding=pad_type)
         self.bn2 = norm_layer(self.num_features, **norm_kwargs)
         self.act2 = act_layer(inplace=True)
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-
-        # Classifier
-        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), self.num_classes)
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
 
         efficientnet_init_weights(self)
 
     def as_sequential(self):
         layers = [self.conv_stem, self.bn1, self.act1]
         layers.extend(self.blocks)
         layers.extend([self.conv_head, self.bn2, self.act2, self.global_pool])
-        layers.extend([nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier])
+        layers.extend([nn.Dropout(self.drop_rate), self.classifier])
         return nn.Sequential(*layers)
 
     def get_classifier(self):
         return self.classifier
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        if num_classes:
-            num_features = self.num_features * self.global_pool.feat_mult()
-            self.classifier = nn.Linear(num_features, num_classes)
-        else:
-            self.classifier = nn.Identity()
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
 
     def forward_features(self, x):
         x = self.conv_stem(x)
@@ -397,7 +389,6 @@ def forward_features(self, x):
     def forward(self, x):
         x = self.forward_features(x)
         x = self.global_pool(x)
-        x = x.flatten(1)
         if self.drop_rate > 0.:
             x = F.dropout(x, p=self.drop_rate, training=self.training)
         return self.classifier(x)
@@ -417,24 +408,21 @@ def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bo
         super(EfficientNetFeatures, self).__init__()
         norm_kwargs = norm_kwargs or {}
         self.drop_rate = drop_rate
-        self._in_chs = in_chans
 
         # Stem
         if not fix_stem:
             stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min)
-        self.conv_stem = create_conv2d(self._in_chs, stem_size, 3, stride=2, padding=pad_type)
+        self.conv_stem = create_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
         self.bn1 = norm_layer(stem_size, **norm_kwargs)
         self.act1 = act_layer(inplace=True)
-        self._in_chs = stem_size
 
         # Middle stages (IR/ER/DS Blocks)
         builder = EfficientNetBuilder(
             channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
             norm_layer, norm_kwargs, drop_path_rate, feature_location=feature_location, verbose=_DEBUG)
-        self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
+        self.blocks = nn.Sequential(*builder(stem_size, block_args))
         self.feature_info = FeatureInfo(builder.features, out_indices)
         self._stage_out_idx = {v['stage']: i for i, v in enumerate(self.feature_info) if i in out_indices}
-        self._in_chs = builder.in_chs
 
         efficientnet_init_weights(self)
 
 
@@ -13,7 +13,7 @@
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d, get_padding
+from .layers import create_classifier, get_padding
 from .registry import register_model
 
 __all__ = ['Xception65']
@@ -192,16 +192,14 @@ def __init__(self, num_classes=1000, in_chans=3, output_stride=32, norm_layer=nn
             dict(num_chs=2048, reduction=32, module='act5'),
         ]
 
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
+        self.global_pool, self.fc = create_classifier(self.num_features, self.num_classes, pool_type=global_pool)
 
     def get_classifier(self):
         return self.fc
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) if num_classes else None
+        self.global_pool, self.fc = create_classifier(self.num_features, self.num_classes, pool_type=global_pool)
 
     def forward_features(self, x):
         # Entry flow
@@ -242,7 +240,7 @@ def forward_features(self, x):
 
     def forward(self, x):
         x = self.forward_features(x)
-        x = self.global_pool(x).flatten(1)
+        x = self.global_pool(x)
         if self.drop_rate:
             F.dropout(x, self.drop_rate, training=self.training)
         x = self.fc(x)
 
@@ -187,10 +187,13 @@ def adapt_model_from_string(parent_module, model_string):
                 affine=old_module.affine, track_running_stats=True)
             set_layer(new_module, n, new_bn)
         if isinstance(old_module, nn.Linear):
+            # FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
+            num_features = state_dict[n + '.weight'][1]
             new_fc = nn.Linear(
-                in_features=state_dict[n + '.weight'][1], out_features=old_module.out_features,
-                bias=old_module.bias is not None)
+                in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
             set_layer(new_module, n, new_fc)
+            if hasattr(new_module, 'num_features'):
+                new_module.num_features = num_features
     new_module.eval()
     parent_module.eval()
 
 
@@ -18,7 +18,7 @@
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from .features import FeatureInfo
 from .helpers import build_model_with_cfg
-from .layers import SelectAdaptivePool2d
+from .layers import create_classifier
 from .registry import register_model
 from .resnet import BasicBlock, Bottleneck  # leveraging ResNet blocks w/ additional features like SE
 
@@ -553,8 +553,8 @@ def __init__(self, cfg, in_chans=3, num_classes=1000, global_pool='avg', drop_ra
             # Classification Head
             self.num_features = 2048
             self.incre_modules, self.downsamp_modules, self.final_layer = self._make_head(pre_stage_channels)
-            self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-            self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes)
+            self.global_pool, self.classifier = create_classifier(
+                self.num_features, self.num_classes, pool_type=global_pool)
         elif head == 'incre':
             self.num_features = 2048
             self.incre_modules, _, _ = self._make_head(pre_stage_channels, True)
@@ -685,12 +685,8 @@ def get_classifier(self):
 
     def reset_classifier(self, num_classes, global_pool='avg'):
         self.num_classes = num_classes
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        num_features = self.num_features * self.global_pool.feat_mult()
-        if num_classes:
-            self.classifier = nn.Linear(num_features, num_classes)
-        else:
-            self.classifier = nn.Identity()
+        self.global_pool, self.classifier = create_classifier(
+            self.num_features, self.num_classes, pool_type=global_pool)
 
     def stages(self, x) -> List[torch.Tensor]:
         x = self.layer1(x)
@@ -726,7 +722,7 @@ def forward_features(self, x):
 
     def forward(self, x):
         x = self.forward_features(x)
-        x = self.global_pool(x).flatten(1)
+        x = self.global_pool(x)
         if self.drop_rate > 0.:
             x = F.dropout(x, p=self.drop_rate, training=self.training)
         x = self.classifier(x)