Hello
The following kernel compiles well in Release mode, but not in Debug mode:
q.submit([&](sycl::handler& h) {
sycl::accessor aAcc(aBuf, h, sycl::read_only);
sycl::accessor bAcc(bBuf, h, sycl::read_only);
sycl::accessor cAcc(cBuf, h, sycl::write_only, sycl::no_init);
h.parallel_for(ndr, [=](sycl::nd_item<2> ii) {
const sycl::id<2> i = ii.get_global_id();
const int row = i[0];
const int col = i[1];
sycl::int8 s{};
for(int k = 0; k < nd8; k++) {
const int bRow = k*VectorSize;
VectorType bc(
bAcc[bRow + 0][col],
bAcc[bRow + 1][col],
bAcc[bRow + 2][col],
bAcc[bRow + 3][col],
bAcc[bRow + 4][col],
bAcc[bRow + 5][col],
bAcc[bRow + 6][col],
bAcc[bRow + 7][col]
);
s += aAcc[row][k]*bc;
}
//cAcc[i] = s.s0() + s.s1() + s.s2() + s.s3() + s.s4() + s.s5() + s.s6() + s.s7();
cAcc[i] = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7];
});
});
Accessing vector components by member function si() or index operator s[i] of vector types like sycl::int8 results on Windows in a compilation error in debug mode, because the function _STL_REPORT_ERROR(…) hasn’t been declared as SYCL_EXTERNAL in the STL version used by MDd.
This error disappears if neither /D _DEBUG nor /MDd are used in combination with /fsycl-targets=nvptx64-nvidia-cuda.