Merge remote-tracking branch 'upstream/3.4' into merge-3.4

alalek · alalek · commit f52a2cf5e16c · 2020-09-19T17:03:08.000Z
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -464,6 +464,7 @@ OCV_OPTION(BUILD_OBJC               "Enable Objective-C support"
 # OpenCV installation options
 # ===================================================
 OCV_OPTION(INSTALL_CREATE_DISTRIB   "Change install rules to build the distribution package" OFF )
+OCV_OPTION(INSTALL_BIN_EXAMPLES     "Install prebuilt examples" WIN32 IF BUILD_EXAMPLES)
 OCV_OPTION(INSTALL_C_EXAMPLES       "Install C examples"        OFF )
 OCV_OPTION(INSTALL_PYTHON_EXAMPLES  "Install Python examples"   OFF )
 OCV_OPTION(INSTALL_ANDROID_EXAMPLES "Install Android examples"  OFF IF ANDROID )
diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake
@@ -1364,8 +1364,8 @@ function(ocv_add_samples)
           add_dependencies(${the_target} opencv_videoio_plugins)
         endif()
 
-        if(WIN32)
-          install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${module_id}" COMPONENT samples)
+        if(INSTALL_BIN_EXAMPLES)
+          install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${module_id}" COMPONENT samples)
         endif()
       endforeach()
     endif()
diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown
@@ -78,7 +78,7 @@ pixelpoints = np.transpose(np.nonzero(mask))
 Here, two methods, one using Numpy functions, next one using OpenCV function (last commented line)
 are given to do the same. Results are also same, but with a slight difference. Numpy gives
 coordinates in **(row, column)** format, while OpenCV gives coordinates in **(x,y)** format. So
-basically the answers will be interchanged. Note that, **row = x** and **column = y**.
+basically the answers will be interchanged. Note that, **row = y** and **column = x**.
 
 7. Maximum Value, Minimum Value and their locations
 ---------------------------------------------------
diff --git a/modules/core/src/matrix_wrap.cpp b/modules/core/src/matrix_wrap.cpp
@@ -1248,6 +1248,7 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
     {
         CV_Assert( i < 0 );
         Mat& m = *(Mat*)obj;
+        CV_Assert(!(m.empty() && fixedType() && fixedSize()) && "Can't reallocate empty Mat with locked layout (probably due to misused 'const' modifier)");
         if (allowTransposed && !m.empty() &&
             d == 2 && m.dims == 2 &&
             m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] &&
@@ -1261,13 +1262,13 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
             if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 )
                 mtype = m.type();
             else
-                CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), "");
+                CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), "Can't reallocate Mat with locked type (probably due to misused 'const' modifier)");
         }
         if(fixedSize())
         {
-            CV_CheckEQ(m.dims, d, "");
+            CV_CheckEQ(m.dims, d, "Can't reallocate Mat with locked size (probably due to misused 'const' modifier)");
             for(int j = 0; j < d; ++j)
-                CV_CheckEQ(m.size[j], sizes[j], "");
+                CV_CheckEQ(m.size[j], sizes[j], "Can't reallocate Mat with locked size (probably due to misused 'const' modifier)");
         }
         m.create(d, sizes, mtype);
         return;
@@ -1277,6 +1278,7 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
     {
         CV_Assert( i < 0 );
         UMat& m = *(UMat*)obj;
+        CV_Assert(!(m.empty() && fixedType() && fixedSize()) && "Can't reallocate empty UMat with locked layout (probably due to misused 'const' modifier)");
         if (allowTransposed && !m.empty() &&
             d == 2 && m.dims == 2 &&
             m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] &&
@@ -1290,13 +1292,13 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,
             if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 )
                 mtype = m.type();
             else
-                CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), "");
+                CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), "Can't reallocate UMat with locked type (probably due to misused 'const' modifier)");
         }
         if(fixedSize())
         {
-            CV_CheckEQ(m.dims, d, "");
+            CV_CheckEQ(m.dims, d, "Can't reallocate UMat with locked size (probably due to misused 'const' modifier)");
             for(int j = 0; j < d; ++j)
-                CV_CheckEQ(m.size[j], sizes[j], "");
+                CV_CheckEQ(m.size[j], sizes[j], "Can't reallocate UMat with locked size (probably due to misused 'const' modifier)");
         }
         m.create(d, sizes, mtype);
         return;
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp
@@ -3311,7 +3311,7 @@ KernelArg KernelArg::Constant(const Mat& m)
 struct Kernel::Impl
 {
     Impl(const char* kname, const Program& prog) :
-        refcount(1), handle(NULL), isInProgress(false), nu(0)
+        refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
     {
         cl_program ph = (cl_program)prog.ptr();
         cl_int retval = 0;
@@ -3388,6 +3388,7 @@ struct Kernel::Impl
     enum { MAX_ARRS = 16 };
     UMatData* u[MAX_ARRS];
     bool isInProgress;
+    bool isAsyncRun;  // true if kernel was scheduled in async mode
     int nu;
     std::list<Image2D> images;
     bool haveTempDstUMats;
@@ -3667,13 +3668,45 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
 }
 
 
+static bool isRaiseErrorOnReuseAsyncKernel()
+{
+    static bool initialized = false;
+    static bool value = false;
+    if (!initialized)
+    {
+        value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
+        initialized = true;
+    }
+    return value;
+}
+
 bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
         bool sync, int64* timeNS, const Queue& q)
 {
     CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
 
-    if (!handle || isInProgress)
+    if (!handle)
+    {
+        CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
         return false;
+    }
+
+    if (isAsyncRun)
+    {
+        CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
+        if (isRaiseErrorOnReuseAsyncKernel())
+            CV_Assert(0);
+        return false;  // OpenCV 5.0: raise error
+    }
+    isAsyncRun = !sync;
+
+    if (isInProgress)
+    {
+        CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
+        if (isRaiseErrorOnReuseAsyncKernel())
+            CV_Assert(0);
+        return false;  // OpenCV 5.0: raise error
+    }
 
     cl_command_queue qq = getQueue(q);
     if (haveTempDstUMats)
diff --git a/modules/dnn/src/ocl4dnn/src/math_functions.cpp b/modules/dnn/src/ocl4dnn/src/math_functions.cpp
@@ -46,6 +46,8 @@
 #include <vector>
 #include "opencl_kernels_dnn.hpp"
 
+#include "opencv2/core/utils/logger.hpp"
+
 namespace cv { namespace dnn { namespace ocl4dnn {
 
 enum gemm_data_type_t
@@ -238,10 +240,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
         kernel_name += "_float";
     }
 
-    ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
-    if (oclk_gemm_float.empty())
-        return false;
-
     while (C_start_y < M)
     {
         blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
@@ -348,6 +346,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
             }
             local[1] = 1;
 
+            ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
+            if (oclk_gemm_float.empty())
+                return false;
+
             cl_uint arg_idx = 0;
             if (is_image_a)
                 oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
@@ -378,7 +380,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
             oclk_gemm_float.set(arg_idx++, isFirstColBlock);
 
             if (!oclk_gemm_float.run(2, global, local, false))
+            {
+                CV_LOG_WARNING(NULL, "OpenCL kernel enqueue failed: " << kernel_name);
                 return false;
+            }
 
             if (TransA == CblasNoTrans)
                 A_start_x += blockA_width;
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -392,24 +392,21 @@ void ONNXImporter::populateNet(Net dstNet)
             layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
         }
         else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" ||
-                layer_type == "ReduceMean" || layer_type == "ReduceSum")
+                layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")
         {
             CV_Assert(node_proto.input_size() == 1);
             layerParams.type = "Pooling";
             String pool;
-            if (layer_type == "GlobalMaxPool")
+            if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax")
                 pool = "MAX";
             else if (layer_type == "ReduceSum")
                 pool = "SUM";
             else
                 pool = "AVE";
             layerParams.set("pool", pool);
-            layerParams.set("global_pooling", layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool");
-            if (layer_type == "ReduceMean" || layer_type == "ReduceSum")
+            layerParams.set("global_pooling", !layerParams.has("axes"));
+            if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
             {
-                if (!layerParams.has("axes"))
-                    CV_Error(Error::StsNotImplemented, "Unsupported mode of " + layer_type + " operation.");
-
                 MatShape inpShape = outShapes[node_proto.input(0)];
                 DictValue axes = layerParams.get("axes");
                 bool keepdims = layerParams.get<int>("keepdims");
@@ -487,6 +484,36 @@ void ONNXImporter::populateNet(Net dstNet)
                 layerParams.type = "Reshape";
                 layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));
 
+                node_proto.set_input(0, node_proto.output(0));
+                node_proto.set_output(0, layerParams.name);
+            }
+            else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
+            {
+                CV_CheckEQ(layerParams.get<int>("keepdims"), 0, (layer_type + " layer only supports keepdims = false").c_str());
+                LayerParams reshapeLp;
+                reshapeLp.name = layerParams.name + "/reshape";
+                reshapeLp.type = "Reshape";
+                CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
+                int newShape[] = {1, 1, 1, -1};
+                reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4));
+
+                opencv_onnx::NodeProto proto;
+                proto.add_input(node_proto.input(0));
+                proto.add_output(reshapeLp.name);
+                addLayer(dstNet, reshapeLp, proto, layer_id, outShapes);
+
+                LayerParams poolLp = layerParams;
+                poolLp.name = layerParams.name + "/pool";
+                CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
+
+                node_proto.set_input(0, reshapeLp.name);
+                node_proto.set_output(0, poolLp.name);
+                addLayer(dstNet, poolLp, node_proto, layer_id, outShapes);
+
+                layerParams.type = "Reshape";
+                int targetShape[] = {1};
+                layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1));
+
                 node_proto.set_input(0, node_proto.output(0));
                 node_proto.set_output(0, layerParams.name);
             }
@@ -653,7 +680,7 @@ void ONNXImporter::populateNet(Net dstNet)
                         LayerParams constParams;
                         constParams.name = layerParams.name + "/const";
                         constParams.type = "Const";
-                        constParams.blobs.push_back(blob);
+                        constParams.blobs.push_back((isSub ? -1 : 1) * blob);
                         int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
                         layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
                         outShapes[constParams.name] = shape(blob);
@@ -1024,6 +1051,16 @@ void ONNXImporter::populateNet(Net dstNet)
             }
             else
             {
+                // Scale layer allocate output with the first input shape
+                if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
+                {
+                    opencv_onnx::NodeProto proto;
+                    proto.add_input(node_proto.input(1));
+                    proto.add_input(node_proto.input(0));
+                    proto.add_output(layerParams.name);
+                    node_proto = proto;
+                }
+
                 if (isDiv)
                 {
                     LayerParams powerParams;
@@ -1427,8 +1464,10 @@ void ONNXImporter::populateNet(Net dstNet)
                     case opencv_onnx::TensorProto_DataType_INT64:   type = CV_32S; break;
                     default: type = blob.type();
                 }
-                blob.convertTo(blob, type);
-                addConstant(layerParams.name, blob, constBlobs, outShapes);
+                Mat dst;
+                blob.convertTo(dst, type);
+                dst.dims = blob.dims;
+                addConstant(layerParams.name, dst, constBlobs, outShapes);
                 continue;
             }
             else
@@ -1477,6 +1516,8 @@ void ONNXImporter::populateNet(Net dstNet)
                 {
                     outShape.erase(outShape.begin() + axis);
                     out.reshape(0, outShape);
+                } else {
+                    out.dims = 1;
                 }
                 addConstant(layerParams.name, out, constBlobs, outShapes);
                 continue;
@@ -1557,7 +1598,9 @@ void ONNXImporter::populateNet(Net dstNet)
             Mat shapes = getBlob(node_proto, constBlobs, node_proto.input_size() - 1);
             CV_CheckEQ(shapes.size[0], 4, "");
             CV_CheckEQ(shapes.size[1], 1, "");
-            CV_CheckTypeEQ(shapes.depth(), CV_32S, "");
+            CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
+            if (shapes.depth() == CV_32F)
+                shapes.convertTo(shapes, CV_32S);
             int height = shapes.at<int>(2);
             int width  = shapes.at<int>(3);
             if (node_proto.input_size() == 3)
diff --git a/modules/dnn/src/opencl/prior_box.cl b/modules/dnn/src/opencl/prior_box.cl
@@ -114,6 +114,6 @@ __kernel void clip(const int nthreads,
     for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))
     {
         Dtype4 vec = vload4(index, dst);
-        vstore4(clamp(vec, 0.0f, 1.0f), index, dst);
+        vstore4(clamp(vec, (Dtype)0.0f, (Dtype)1.0f), index, dst);
     }
 }
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
@@ -275,6 +275,18 @@ TEST_P(Test_ONNX_layers, ReduceSum)
     testONNXModels("reduce_sum");
 }
 
+TEST_P(Test_ONNX_layers, ReduceMaxGlobal)
+{
+    testONNXModels("reduce_max");
+}
+
+TEST_P(Test_ONNX_layers, Scale)
+{
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER);
+    testONNXModels("scale");
+}
+
 TEST_P(Test_ONNX_layers, ReduceMean3D)
 {
     if (backend == DNN_BACKEND_CUDA)
@@ -664,6 +676,11 @@ TEST_P(Test_ONNX_layers, MatmulWithTwoInputs)
     testONNXModels("matmul_with_two_inputs");
 }
 
+TEST_P(Test_ONNX_layers, ResizeOpset11_Torch1_6)
+{
+    testONNXModels("resize_opset11_torch1.6");
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_ONNX_layers, dnnBackendsAndTargets());
 
 class Test_ONNX_nets : public Test_ONNX_layers
diff --git a/modules/imgcodecs/src/loadsave.cpp b/modules/imgcodecs/src/loadsave.cpp
@@ -197,7 +197,19 @@ struct ImageCodecInitializer
     std::vector<ImageEncoder> encoders;
 };
 
-static ImageCodecInitializer codecs;
+static
+ImageCodecInitializer& getCodecs()
+{
+#ifdef CV_CXX11
+    static ImageCodecInitializer g_codecs;
+    return g_codecs;
+#else
+    // C++98 doesn't guarantee correctness of multi-threaded initialization of static global variables
+    // (memory leak here is not critical, use C++11 to avoid that)
+    static ImageCodecInitializer* g_codecs = new ImageCodecInitializer();
+    return *g_codecs;
+#endif
+}
 
 /**
  * Find the decoders
@@ -211,6 +223,7 @@ static ImageDecoder findDecoder( const String& filename ) {
     size_t i, maxlen = 0;
 
     /// iterate through list of registered codecs
+    ImageCodecInitializer& codecs = getCodecs();
     for( i = 0; i < codecs.decoders.size(); i++ )
     {
         size_t len = codecs.decoders[i]->signatureLength();
@@ -248,6 +261,7 @@ static ImageDecoder findDecoder( const Mat& buf )
     if( buf.rows*buf.cols < 1 || !buf.isContinuous() )
         return ImageDecoder();
 
+    ImageCodecInitializer& codecs = getCodecs();
     for( i = 0; i < codecs.decoders.size(); i++ )
     {
         size_t len = codecs.decoders[i]->signatureLength();
@@ -280,6 +294,7 @@ static ImageEncoder findEncoder( const String& _ext )
     for( ext++; len < 128 && isalnum(ext[len]); len++ )
         ;
 
+    ImageCodecInitializer& codecs = getCodecs();
     for( size_t i = 0; i < codecs.encoders.size(); i++ )
     {
         String description = codecs.encoders[i]->getDescription();
diff --git a/modules/imgproc/src/connectedcomponents.cpp b/modules/imgproc/src/connectedcomponents.cpp
diff --git a/modules/imgproc/test/test_connectedcomponents.cpp b/modules/imgproc/test/test_connectedcomponents.cpp
diff --git a/modules/videoio/doc/videoio_overview.markdown b/modules/videoio/doc/videoio_overview.markdown
diff --git a/samples/samples_utils.cmake b/samples/samples_utils.cmake

Original file line number	Diff line number	Diff line change
`@@ -114,6 +114,6 @@ __kernel void clip(const int nthreads,`
`114`	`114`	`for (int index = get_global_id(0); index < nthreads; index += get_global_size(0))`
`115`	`115`	`{`
`116`	`116`	`Dtype4 vec = vload4(index, dst);`
`117`		`- vstore4(clamp(vec, 0.0f, 1.0f), index, dst);`
	`117`	`+ vstore4(clamp(vec, (Dtype)0.0f, (Dtype)1.0f), index, dst);`
`118`	`118`	`}`
`119`	`119`	`}`