support Pooling ops with Sequence axis

liqunfu · liqunfu · commit 280ec145e094 · 2020-03-25T11:20:15.000-07:00
diff --git a/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp b/Source/CNTKv2LibraryDll/proto/onnx/CNTKToONNX.cpp
@@ -845,6 +845,12 @@ class CNTKToONNXHelper
         std::unordered_map<Variable, onnxruntime::Node*>& variableNodes, 
         std::vector<ScanLoop>& scanLoops, int createLoopIndex);
 
+    static onnxruntime::Node* CreatePoolingNode(const FunctionPtr& src,
+        onnxruntime::Graph* graph,
+        std::unordered_map<FunctionPtr, onnxruntime::Node*>& functionNodes,
+        std::unordered_map<Variable, onnxruntime::Node*>& variableNodes,
+        std::vector<ScanLoop>& scanLoops, int createLoopIndex);
+
     static onnxruntime::Node* CreateConvolutionNode(const FunctionPtr& src,
         onnxruntime::Graph* graph,
         std::unordered_map<FunctionPtr, onnxruntime::Node*>& functionNodes,
@@ -5396,6 +5402,9 @@ onnxruntime::Node* CNTKToONNXHelper::CreateNode(const FunctionPtr& src,
     std::string cntkOpName = ToLegacyString(ToUTF8(src->OpName()));
     std::string onnxOpName = ToOPName(src);
 
+    if (src->OpName() == L"Pooling")
+        std::cout << "";
+
     // TODO: uncomment this code once bidirectional LSTM is supprted.
     //if (cntkOpName == "Splice")
     //{
@@ -5629,7 +5638,10 @@ onnxruntime::Node* CNTKToONNXHelper::CreateNode(const FunctionPtr& src,
     else
         return CreateConvolutionNode(src, graph, functionNodes, variableNodes, scanLoops, createLoopIndex);
     }
-
+    else if (src->OpName() == L"Pooling" && src->Inputs()[0].HasBatchAxis() && src->Inputs()[0].HasSequenceAxis())
+    {
+        return CreatePoolingNode(src, graph, functionNodes, variableNodes, scanLoops, createLoopIndex);
+    }
     //
     // If this block node equivalent to a primitive ONNX OP, then treated as such.
     // And just maps its argument to ONNX node.
@@ -7087,7 +7099,7 @@ void CNTKToONNXHelper::CopyAttributes(const FunctionPtr& src, onnxruntime::Node*
             auto lowerPad = ToINTS(src->Attributes()[L"lowerPad"].Value<NDShape>());
             auto upperPad = ToINTS(src->Attributes()[L"upperPad"].Value<NDShape>());
 
-            if (IsPadValueValid(lowerPad, upperPad, autoPadding, ceilOutDim))
+            if (IsPadValueValid(lowerPad, upperPad, autoPadding, ceilOutDim) && !(src->Inputs()[0].HasBatchAxis() && src->Inputs()[0].HasSequenceAxis()))
             {
                 if (ceilOutDim)
                     ValidatePadValueForCeilOutDim(lowerPad, upperPad, autoPadding, kernelShape, inputShape, strides,
@@ -8605,6 +8617,52 @@ onnxruntime::Node* ApplyActivationToSequenceConvolution(Node* convNode, const Fu
     return activationNode;
 }
 
+onnxruntime::Node* CNTKToONNXHelper::CreatePoolingNode(const FunctionPtr& src,
+    onnxruntime::Graph* graph,
+    std::unordered_map<FunctionPtr, onnxruntime::Node*>& functionNodes,
+    std::unordered_map<Variable, onnxruntime::Node*>& variableNodes,
+    std::vector<ScanLoop>& scanLoops, int createLoopIndex)
+{
+    if (!src->Inputs()[0].HasBatchAxis() || !src->Inputs()[0].HasSequenceAxis())
+        LogicError("CreatePoolingNode is only to handle MaxPool with batch and sequence dimensions.");
+    
+    std::vector<onnxruntime::NodeArg *> inputs;
+    ProcessInputs(src, graph, functionNodes, variableNodes, inputs,
+        scanLoops, createLoopIndex);
+
+    std::vector<onnxruntime::NodeArg *> outputs;
+    ProcessOutputs(src, inputs, outputs, graph);
+
+    // Max/AveragePool takes input of shape [N, C, H, W] or [N, C, D1, D2, ..., Dn]. CNTK input needs to be reshaped to match it.
+    // reshape [#, *][C, H, W] to [-1, C, H, W]
+    // onnx Max/AveragePool
+    // reshape [-1, C_out, H_out, W_out] to [#, *][C_out, H_out, W_out]
+    vector<int64_t> newDimInputToPooling;
+    // collapse extra dims into one axis as N for ONNX Conv
+    newDimInputToPooling.push_back(-1);
+    for (int i = 2; i < inputs[0]->Shape()->dim_size(); i++)
+    {
+        // copy C, H, W
+        if (!inputs[0]->Shape()->dim(i).has_dim_value())
+            LogicError("Max/AveragePool: feature dimensions need to have dim value.");
+        newDimInputToPooling.push_back(inputs[0]->Shape()->dim(i).dim_value());
+    }
+
+    onnxruntime::Node* preReshape = AddReshapeNode(*inputs[0], newDimInputToPooling, inputs[0]->Name() + "_reshaped_for_max_pool", graph);
+    const std::vector<onnxruntime::NodeArg *> pooling_inputs({const_cast<NodeArg *>(preReshape->OutputDefs()[0])});
+    TypeProto poolingOutputTypeProto;
+    UpdateONNXType(src->Outputs()[0].GetDataType(), poolingOutputTypeProto);
+
+    NodeArg *poolingOutputArg = &graph->GetOrCreateNodeArg(outputs[0]->Name() + "_pooling_of_reshaped", &poolingOutputTypeProto);
+
+    onnxruntime::Node* poolingNode = AddNode(src, graph, pooling_inputs, { poolingOutputArg });
+
+    vector<int64_t> newDimOutputFromPooling = ToINTS(*outputs[0]->TypeAsProto());
+    onnxruntime::Node* postReshape = AddReshapeNode(*poolingOutputArg, newDimOutputFromPooling, outputs[0]->Name(), graph);
+
+    return poolingNode;
+}
+
 onnxruntime::Node* CNTKToONNXHelper::CreateConvolutionNode(const FunctionPtr& src,
     onnxruntime::Graph* graph,
     std::unordered_map<FunctionPtr, onnxruntime::Node*>& functionNodes,
diff --git a/bindings/python/cntk/tests/onnx_op_test.py b/bindings/python/cntk/tests/onnx_op_test.py
@@ -423,6 +423,18 @@ def test_AveragePool(tmpdir, dtype, device_id):
 
         verify_one_input(model, img, tmpdir, 'AveragePool_2', device)
 
+#AveragePool
+@pytest.mark.parametrize("dtype", DType_Config)
+def test_AvergaePoolWithSequenceAxis(tmpdir, dtype, device_id):
+    if device_id == -1 and dtype == np.float16:
+        pytest.skip('Test is skipped on CPU with float16 data')
+    device = cntk_device(device_id)
+    with C.default_options(dtype=dtype):
+        img = np.reshape(np.arange(16, dtype = dtype), [1, 4, 4])
+        x = C.sequence.input_variable(img.shape)
+        model = C.pooling(x, C.AVG_POOLING, (2,2), (2,2))
+        verify_sequence_model(model, np.reshape(img, [1, 1, 1, 4, 4]), tmpdir, "AveragePoolWithSeq_1", resave = False, bypass_load_into_cntk = True)
+
 #BatchNormalization
 def verify_BN(x, init_scale, init_bias, mean, var, epsilon, spatial, tmpdir, dtype):
     with C.default_options(dtype = dtype):
@@ -1311,7 +1323,7 @@ def test_Max(tmpdir, dtype):
 
 #MaxPool
 @pytest.mark.parametrize("dtype", DType_Config)
-def test_MaxPool(tmpdir, dtype, device_id):    
+def test_MaxPool(tmpdir, dtype, device_id):
     if device_id == -1 and dtype == np.float16:
         pytest.skip('Test is skipped on CPU with float16 data')
     device = cntk_device(device_id)
@@ -1327,6 +1339,18 @@ def test_MaxPool(tmpdir, dtype, device_id):
         model = C.pooling(x, C.MAX_POOLING, (3, 3), (2, 2), auto_padding=[False, False, False], ceil_out_dim=True)
         verify_one_input(model, img, tmpdir, 'MaxPool_2', device)
 
+#MaxPool
+@pytest.mark.parametrize("dtype", DType_Config)
+def test_MaxPoolWithSequenceAxis(tmpdir, dtype, device_id):
+    if device_id == -1 and dtype == np.float16:
+        pytest.skip('Test is skipped on CPU with float16 data')
+    device = cntk_device(device_id)
+    with C.default_options(dtype=dtype):
+        img = np.reshape(np.arange(16, dtype = dtype), [1, 4, 4])
+        x = C.sequence.input_variable(img.shape)
+        model = C.pooling(x, C.MAX_POOLING, (2,2), (2,2))
+        verify_sequence_model(model, np.reshape(img, [1, 1, 1, 4, 4]), tmpdir, "MaxPoolWithSeq_1", resave = False, bypass_load_into_cntk = True)
+
 #MaxRoiPool
 @pytest.mark.parametrize("dtype", DType_Config)
 def test_MaxRoiPool(tmpdir, dtype):