I started writing a ray tracer using SYCL and I’m still learning the basics. My current issue is that, when trying to use a kernel class to organize my code, calling handler.parallel_for() on my range AND on my kernel class doesn’t seem to be executing every item of the range.
#include <CL/sycl.hpp>
#include "render_kernel.h"
#include "triangle.h"
int main(int argc, char* argv[])
{
const int width = 128;
const int height = 128;
sycl::queue queue;
std::cout << "Using " << queue.get_device().get_info<sycl::info::device::name>() << std::endl;
Image image(width, height);
std::vector<Triangle> triangles_buffer_host;
triangles_buffer_host.push_back(Triangle(Point(0, 0, -2), Point(1, 0, -2), Point(0.5, 0.5, -2.0)));
sycl::buffer<Color> image_buffer(image.color_data(), image.width() * image.height());
sycl::buffer<Triangle> triangle_buffer(triangles_buffer_host.data(), triangles_buffer_host.size());
queue.submit([&] (sycl::handler& handler) {
auto image_buffer_access = image_buffer.get_access<sycl::access::mode::write>(handler);
auto triangle_buffer_access = triangle_buffer.get_access<sycl::access::mode::read>(handler);
const auto global_range = sycl::range<2>(width, height);
const auto local_range = sycl::range<2>(8, 8);
const auto coordinates_indices = sycl::nd_range<2>(global_range, local_range);
sycl::stream debug_out_stream(1024, 128, handler);
auto render_kernel = RenderKernel(width, height,
image_buffer_access,
triangle_buffer_access,
debug_out_stream);
handler.parallel_for(coordinates_indices, render_kernel);
// The commented code below works as expected and the whole image is red
// handler.parallel_for(coordinates_indices, [image_buffer_access] (sycl::nd_item<2> coordinates)
// {
// int x = coordinates.get_global_id(0);
// int y = coordinates.get_global_id(1);
// image_buffer_access[y * width + x] = Color(1.0, 0.0, 0.0);
// });
}).wait();
return 0;
}
render_kernel.h:
#include <CL/sycl.hpp>
#include "color.h"
#include "triangle.h"
class RenderKernel
{
public:
RenderKernel(int width, int height,
sycl::accessor<Color, 1, sycl::access::mode::write, sycl::access::target::device> frame_buffer_accessor,
sycl::accessor<Triangle, 1, sycl::access::mode::read, sycl::access::target::device> triangle_buffer_accessor,
sycl::stream debug_out_stream) :
m_frame_buffer_access(frame_buffer_accessor),
m_triangle_buffer_access(triangle_buffer_accessor),
m_out_stream(debug_out_stream) {}
SYCL_EXTERNAL void operator()(const sycl::nd_item<2>& coordinates) const
{
int x = coordinates.get_global_id(0);
int y = coordinates.get_global_id(1);
m_frame_buffer_access[y * m_width + x] = Color(1.0, 0.0, 0.0);
}
private:
int m_width, m_height;
sycl::accessor<Color, 1, sycl::access::mode::write, sycl::access::target::device> m_frame_buffer_access;
sycl::accessor<Triangle, 1, sycl::access::mode::read, sycl::access::target::device> m_triangle_buffer_access;
sycl::stream m_out_stream;
};
Executing my kernel with a lambda function like so:
handler.parallel_for(coordinates_indices, [image_buffer_access] (sycl::nd_item<2> coordinates)
{
int x = coordinates.get_global_id(0);
int y = coordinates.get_global_id(1);
image_buffer_access[y * width + x] = Color(1.0, 0.0, 0.0);
});
does work as expected. It really is when I’m using my kernel class as a functor that only a few items get executed.
Here is a result image as an example:
What could be the source of the issue?