Skip to content

Commit 45fee13

Browse files
committed
Merge pull request opencv#18362 from alalek:ocl_async_kernel_reschedule_bug
2 parents 3e3787e + 4fa8280 commit 45fee13

File tree

2 files changed

+44
-6
lines changed

2 files changed

+44
-6
lines changed

modules/core/src/ocl.cpp

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2755,7 +2755,7 @@ KernelArg KernelArg::Constant(const Mat& m)
27552755
struct Kernel::Impl
27562756
{
27572757
Impl(const char* kname, const Program& prog) :
2758-
refcount(1), handle(NULL), isInProgress(false), nu(0)
2758+
refcount(1), handle(NULL), isInProgress(false), isAsyncRun(false), nu(0)
27592759
{
27602760
cl_program ph = (cl_program)prog.ptr();
27612761
cl_int retval = 0;
@@ -2832,6 +2832,7 @@ struct Kernel::Impl
28322832
enum { MAX_ARRS = 16 };
28332833
UMatData* u[MAX_ARRS];
28342834
bool isInProgress;
2835+
bool isAsyncRun; // true if kernel was scheduled in async mode
28352836
int nu;
28362837
std::list<Image2D> images;
28372838
bool haveTempDstUMats;
@@ -3111,13 +3112,45 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
31113112
}
31123113

31133114

3115+
static bool isRaiseErrorOnReuseAsyncKernel()
3116+
{
3117+
static bool initialized = false;
3118+
static bool value = false;
3119+
if (!initialized)
3120+
{
3121+
value = cv::utils::getConfigurationParameterBool("OPENCV_OPENCL_RAISE_ERROR_REUSE_ASYNC_KERNEL", false);
3122+
initialized = true;
3123+
}
3124+
return value;
3125+
}
3126+
31143127
bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
31153128
bool sync, int64* timeNS, const Queue& q)
31163129
{
31173130
CV_INSTRUMENT_REGION_OPENCL_RUN(name.c_str());
31183131

3119-
if (!handle || isInProgress)
3132+
if (!handle)
3133+
{
3134+
CV_LOG_ERROR(NULL, "OpenCL kernel has zero handle: " << name);
31203135
return false;
3136+
}
3137+
3138+
if (isAsyncRun)
3139+
{
3140+
CV_LOG_ERROR(NULL, "OpenCL kernel can't be reused in async mode: " << name);
3141+
if (isRaiseErrorOnReuseAsyncKernel())
3142+
CV_Assert(0);
3143+
return false; // OpenCV 5.0: raise error
3144+
}
3145+
isAsyncRun = !sync;
3146+
3147+
if (isInProgress)
3148+
{
3149+
CV_LOG_ERROR(NULL, "Previous OpenCL kernel launch is not finished: " << name);
3150+
if (isRaiseErrorOnReuseAsyncKernel())
3151+
CV_Assert(0);
3152+
return false; // OpenCV 5.0: raise error
3153+
}
31213154

31223155
cl_command_queue qq = getQueue(q);
31233156
if (haveTempDstUMats)

modules/dnn/src/ocl4dnn/src/math_functions.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
#include <vector>
4747
#include "opencl_kernels_dnn.hpp"
4848

49+
#include "opencv2/core/utils/logger.hpp"
50+
4951
namespace cv { namespace dnn { namespace ocl4dnn {
5052

5153
enum gemm_data_type_t
@@ -238,10 +240,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
238240
kernel_name += "_float";
239241
}
240242

241-
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
242-
if (oclk_gemm_float.empty())
243-
return false;
244-
245243
while (C_start_y < M)
246244
{
247245
blockC_width = std::min(static_cast<int>(N) - C_start_x, blocksize);
@@ -348,6 +346,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
348346
}
349347
local[1] = 1;
350348

349+
ocl::Kernel oclk_gemm_float(kernel_name.c_str(), ocl::dnn::gemm_image_oclsrc, opts);
350+
if (oclk_gemm_float.empty())
351+
return false;
352+
351353
cl_uint arg_idx = 0;
352354
if (is_image_a)
353355
oclk_gemm_float.set(arg_idx++, ocl::KernelArg::PtrReadOnly(A));
@@ -378,7 +380,10 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
378380
oclk_gemm_float.set(arg_idx++, isFirstColBlock);
379381

380382
if (!oclk_gemm_float.run(2, global, local, false))
383+
{
384+
CV_LOG_WARNING(NULL, "OpenCL kernel enqueue failed: " << kernel_name);
381385
return false;
386+
}
382387

383388
if (TransA == CblasNoTrans)
384389
A_start_x += blockA_width;

0 commit comments

Comments
 (0)