Incorrect matrix values

Hello,
I am testing sycl code to “draw +” shape on 2D matrix. But strangely I got artifacts (see picture) while expecting “+” shape at column 16, row 16.
Qustion: Why this could happen?

Kernel code (i - means frame number, as I am testing animation, but all frames should be the same in this test case):

uint adrT = i * size * size;
uint adr = y * size + x;
Treg[i * size * size + y * size + x] = (x == 15 || y == 15) ? 10.f : 0;

Figure_1

syclTest.cpp:

#include <CL/sycl.hpp>

#include <iostream>
#include <fstream>
#include <time.h>
#include <sys/time.h>

using namespace cl::sycl;

class kernel1Name;
class kernel2Name;

double get_wall_time()
{
    struct timeval time;
    if (gettimeofday(&time, NULL))
    {
        //  Handle error
        return 0;
    }
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
}

int main()
{
    const uint Ncount = 8;
    const uint size = 32;
    uint t_start, t_end;
    double wall0;

    //   float *V_YXHost = new float[cellCountX * cellCountY];
    float *THost = new float[size * size];
    float *TregHost = new float[size * size * Ncount];

    for (uint t = 0; t < Ncount; t++)
        for (uint y = 0; y < size; y++)
            for (uint x = 0; x < size; x++)
            {
                THost[y * size + x] = (x > 14 && x < 16) ? 10.f : 0;
                TregHost[t * size * size + y * size + x] = (x > 14 && x < 16) ? 10.f : 0;
            }

    try
    {
        // default_selector selector;
        host_selector selector;
        queue myQueue(selector, [](exception_list l) {
            for (auto ep : l)
            {
                try
                {
                    std::rethrow_exception(ep);
                }
                catch (const exception &e)
                {
                    std::cout << "Asynchronous exception caught:\n"
                              << e.what();
                }
            }
        });

        // buffer<float, 1> IBuff(IHost, range<1>(cellCountX * cellCountY));
        // buffer<float, 1> V_YXBuff(V_YXHost, range<1>(cellCountX * cellCountY));
        buffer<float, 1> TBuff(THost, range<1>(size * size));
        buffer<float, 1> TregBuff(THost, range<1>(size * size * Ncount));

        printf("Starting calc..\n");
        t_start = clock();
        wall0 = get_wall_time();

        for (uint i = 0; i < Ncount; i++)
        {
            myQueue.submit([&](handler &cgh) {
                stream out(1024, 256, cgh);

                auto T = TBuff.get_access<access::mode::read_write>(cgh);
                auto Treg = TregBuff.get_access<access::mode::read_write>(cgh);

                auto myRange = nd_range<2>(range<2>(size, size), range<2>(size / 8, size / 8));

                auto myKernel1 = ([i, size, T, Treg](nd_item<2> item) {
                    uint x = item.get_global_id()[0];
                    uint y = item.get_global_id()[1];

                    // if (x < size && y < size)
                    {
                        uint adrT = i * size * size;
                        uint adr = y * size + x;

                        Treg[i * size * size + y * size + x] = (x == 15 || y == 15) ? 10.f : 0;

                        // if (x > 0 && x < size - 1 && y > 0 && y < size - 1)
                        // {
                        //     float T0 = T[adr];
                        //     // float dx1 = T[size * y + x - 1] - T0;
                        //     // float dx2 = T[size * y + x + 1] - T0;
                        //     if (x == 15 && y == 15)
                        //     {
                        //         T0 += 1.f;
                        //         T[adr] = T0;
                        //         Treg[adrT + adr] = T0;
                        //         float aa = Treg[adrT + adr];
                        //         float bb = aa;
                        //     }
                        // }
                    }
                });
                cgh.parallel_for<kernel1Name>(myRange, myKernel1);
            });
        }
        //V_tYX_reg_ = V_tYX_regBuff.get_access<access::mode::read>();
    }
    catch (const exception &e)
    {
        std::cout << "Synchronous exception caught:\n"
                  << e.what();
        return 2;
    }
    printf("Calc. done.\n");
    t_end = clock();
    double wall1 = get_wall_time();

    std::cout << "Wall Time = " << wall1 - wall0 << std::endl;
    std::cout << "CPU time required for execution: "
              << (double)(t_end - t_start) / CLOCKS_PER_SEC
              << " seconds."
              << "\n";
    // writes output file

    FILE *f = fopen("trink.binary", "wb");
    if (f == NULL)
    {
        perror("Failed: ");
        return 1;
    }
    fwrite(TregHost, sizeof(float), size * size * Ncount, f);
    fclose(f);
    printf("File wrote.\n");
    return 0;
}

CMakeLists.txt:

cmake_minimum_required(VERSION 3.4.3)
project(syclTest)

set(CMAKE_BUILD_TYPE Debug)
# set(CMAKE_BUILD_TYPE Release)

set(COMPUTECPP_RUNTIME_LIBRARY /opt/ComputeCpp-CE/lib/libComputeCpp.so)
set(COMPUTECPP_RUNTIME_LIBRARY_DEBUG /opt/ComputeCpp-CE/lib/libComputeCpp.so)
set(ComputeCpp_INCLUDE_DIRS /opt/ComputeCpp-CE/include)
set(COMPUTECPP_BITCODE "ptx64")
# set(COMPUTECPP_USER_FLAGS "-O3 -std=c++14")

list(APPEND CMAKE_MODULE_PATH /home/kest/computecpp-sdk/cmake/Modules/)

find_package(ComputeCpp REQUIRED)

add_executable(runSyclTest ${CMAKE_CURRENT_SOURCE_DIR}/syclTest.cpp)
target_compile_options(runSyclTest PUBLIC -g -std=c++11 -Wall)
add_sycl_to_target(TARGET runSyclTest
  SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/syclTest.cpp)

Using python to plot.
plot.py:

# -*- coding: utf-8 -*-
"""
Created on 2018-10-03

@author: Kestutis
"""
#%matplotlib nbagg
# IPython reikia parasyti: %matplotlib qt5
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt
import matplotlib.animation as animation

from mpl_toolkits.axes_grid1 import make_axes_locatable
# from heatmap import heatmap
# from heatmap import annotate_heatmap

plt.style.use('dark_background')
#fig = plt.figure()
#ax = fig.add_subplot(111)
fig, [ax, ax2] = plt.subplots(ncols=1,nrows=2)

Writer = animation.writers['ffmpeg']
writer = Writer(fps=5, metadata=dict(artist='Me'), bitrate=1800)

x = np.int(32)
y = np.int(32)
t = np.int(8) # number of samples # 'simul. time' /'reg_dt' in ms

V = np.fromfile('trink.binary', dtype='float32')
V = V.reshape(t,y,x) #t, y, x
#div = make_axes_locatable(ax)
#cax = div.append_axes('right', '5%', '5%')
#cf = ax.contourf(V[0,:,:].squeeze())
#cb = fig.colorbar(cf, cax=cax)
im = ax.matshow(V[0,:,:])#, origin='lower')
cb = fig.colorbar(im)
arr = V[:,:,:]
vmax     = np.max(arr)
vmin     = np.min(arr)
levels   = np.linspace(vmin, vmax, 100, endpoint = True) #https://stackoverflow.com/questions/39472017/how-to-animate-the-colorbar-in-matplotlib
#def update(i):
#    ax.cla()
#    arr = V[i,:,:]
#    cf = ax.contourf(arr,vmax=vmax, vmin=vmin, levels=levels) #vmin,vmax-?
#    fig.colorbar(cf, cax = cax)
#    ax.set_title('%03d'%(i)) 
##    print(i)
im.set_clim(vmin, vmax)
def update(i):
#    vmax     = np.max(arr)
#    vmin     = np.min(arr)
    im.set_data(V[i,:,:])
    ax.set_title('%03d'%(i))
#    im.set_clim(vmin, vmax)
fig.canvas.manager.window.raise_() #iskelti langa i ant kitu
ani = animation.FuncAnimation(fig,update,t,interval=20)
# plt.tight_layout()
# ax.grid(True)
ax2.plot(V[:,12,16])
# ax.matshow(V[0,:,:])#, origin='lower')
#ani.save('contour.mp4', writer=writer)

#paiso V is vienos last.
# fig2, ax2 = plt.subplots()
# ax2.plot(V[:,32,64])
# plt.show(block = "true")
plt.show()

print('Done.')
#plt.yticks(range(Z.shape[0]))
#plt.xticks(range(Z.shape[1]))

Does the code work correctly without using SYCL? Seems like you are referring to variables that are not in the scope of the source file you have posted.