Hello,
I am testing sycl code to “draw +” shape on 2D matrix. But strangely I got artifacts (see picture) while expecting “+” shape at column 16, row 16.
Qustion: Why this could happen?
Kernel code (i - means frame number, as I am testing animation, but all frames should be the same in this test case):
uint adrT = i * size * size;
uint adr = y * size + x;
Treg[i * size * size + y * size + x] = (x == 15 || y == 15) ? 10.f : 0;
syclTest.cpp:
#include <CL/sycl.hpp>
#include <iostream>
#include <fstream>
#include <time.h>
#include <sys/time.h>
using namespace cl::sycl;
class kernel1Name;
class kernel2Name;
double get_wall_time()
{
struct timeval time;
if (gettimeofday(&time, NULL))
{
// Handle error
return 0;
}
return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
int main()
{
const uint Ncount = 8;
const uint size = 32;
uint t_start, t_end;
double wall0;
// float *V_YXHost = new float[cellCountX * cellCountY];
float *THost = new float[size * size];
float *TregHost = new float[size * size * Ncount];
for (uint t = 0; t < Ncount; t++)
for (uint y = 0; y < size; y++)
for (uint x = 0; x < size; x++)
{
THost[y * size + x] = (x > 14 && x < 16) ? 10.f : 0;
TregHost[t * size * size + y * size + x] = (x > 14 && x < 16) ? 10.f : 0;
}
try
{
// default_selector selector;
host_selector selector;
queue myQueue(selector, [](exception_list l) {
for (auto ep : l)
{
try
{
std::rethrow_exception(ep);
}
catch (const exception &e)
{
std::cout << "Asynchronous exception caught:\n"
<< e.what();
}
}
});
// buffer<float, 1> IBuff(IHost, range<1>(cellCountX * cellCountY));
// buffer<float, 1> V_YXBuff(V_YXHost, range<1>(cellCountX * cellCountY));
buffer<float, 1> TBuff(THost, range<1>(size * size));
buffer<float, 1> TregBuff(THost, range<1>(size * size * Ncount));
printf("Starting calc..\n");
t_start = clock();
wall0 = get_wall_time();
for (uint i = 0; i < Ncount; i++)
{
myQueue.submit([&](handler &cgh) {
stream out(1024, 256, cgh);
auto T = TBuff.get_access<access::mode::read_write>(cgh);
auto Treg = TregBuff.get_access<access::mode::read_write>(cgh);
auto myRange = nd_range<2>(range<2>(size, size), range<2>(size / 8, size / 8));
auto myKernel1 = ([i, size, T, Treg](nd_item<2> item) {
uint x = item.get_global_id()[0];
uint y = item.get_global_id()[1];
// if (x < size && y < size)
{
uint adrT = i * size * size;
uint adr = y * size + x;
Treg[i * size * size + y * size + x] = (x == 15 || y == 15) ? 10.f : 0;
// if (x > 0 && x < size - 1 && y > 0 && y < size - 1)
// {
// float T0 = T[adr];
// // float dx1 = T[size * y + x - 1] - T0;
// // float dx2 = T[size * y + x + 1] - T0;
// if (x == 15 && y == 15)
// {
// T0 += 1.f;
// T[adr] = T0;
// Treg[adrT + adr] = T0;
// float aa = Treg[adrT + adr];
// float bb = aa;
// }
// }
}
});
cgh.parallel_for<kernel1Name>(myRange, myKernel1);
});
}
//V_tYX_reg_ = V_tYX_regBuff.get_access<access::mode::read>();
}
catch (const exception &e)
{
std::cout << "Synchronous exception caught:\n"
<< e.what();
return 2;
}
printf("Calc. done.\n");
t_end = clock();
double wall1 = get_wall_time();
std::cout << "Wall Time = " << wall1 - wall0 << std::endl;
std::cout << "CPU time required for execution: "
<< (double)(t_end - t_start) / CLOCKS_PER_SEC
<< " seconds."
<< "\n";
// writes output file
FILE *f = fopen("trink.binary", "wb");
if (f == NULL)
{
perror("Failed: ");
return 1;
}
fwrite(TregHost, sizeof(float), size * size * Ncount, f);
fclose(f);
printf("File wrote.\n");
return 0;
}
CMakeLists.txt:
cmake_minimum_required(VERSION 3.4.3)
project(syclTest)
set(CMAKE_BUILD_TYPE Debug)
# set(CMAKE_BUILD_TYPE Release)
set(COMPUTECPP_RUNTIME_LIBRARY /opt/ComputeCpp-CE/lib/libComputeCpp.so)
set(COMPUTECPP_RUNTIME_LIBRARY_DEBUG /opt/ComputeCpp-CE/lib/libComputeCpp.so)
set(ComputeCpp_INCLUDE_DIRS /opt/ComputeCpp-CE/include)
set(COMPUTECPP_BITCODE "ptx64")
# set(COMPUTECPP_USER_FLAGS "-O3 -std=c++14")
list(APPEND CMAKE_MODULE_PATH /home/kest/computecpp-sdk/cmake/Modules/)
find_package(ComputeCpp REQUIRED)
add_executable(runSyclTest ${CMAKE_CURRENT_SOURCE_DIR}/syclTest.cpp)
target_compile_options(runSyclTest PUBLIC -g -std=c++11 -Wall)
add_sycl_to_target(TARGET runSyclTest
SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/syclTest.cpp)
Using python to plot.
plot.py:
# -*- coding: utf-8 -*-
"""
Created on 2018-10-03
@author: Kestutis
"""
#%matplotlib nbagg
# IPython reikia parasyti: %matplotlib qt5
import numpy as np
import matplotlib as mp
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.axes_grid1 import make_axes_locatable
# from heatmap import heatmap
# from heatmap import annotate_heatmap
plt.style.use('dark_background')
#fig = plt.figure()
#ax = fig.add_subplot(111)
fig, [ax, ax2] = plt.subplots(ncols=1,nrows=2)
Writer = animation.writers['ffmpeg']
writer = Writer(fps=5, metadata=dict(artist='Me'), bitrate=1800)
x = np.int(32)
y = np.int(32)
t = np.int(8) # number of samples # 'simul. time' /'reg_dt' in ms
V = np.fromfile('trink.binary', dtype='float32')
V = V.reshape(t,y,x) #t, y, x
#div = make_axes_locatable(ax)
#cax = div.append_axes('right', '5%', '5%')
#cf = ax.contourf(V[0,:,:].squeeze())
#cb = fig.colorbar(cf, cax=cax)
im = ax.matshow(V[0,:,:])#, origin='lower')
cb = fig.colorbar(im)
arr = V[:,:,:]
vmax = np.max(arr)
vmin = np.min(arr)
levels = np.linspace(vmin, vmax, 100, endpoint = True) #https://stackoverflow.com/questions/39472017/how-to-animate-the-colorbar-in-matplotlib
#def update(i):
# ax.cla()
# arr = V[i,:,:]
# cf = ax.contourf(arr,vmax=vmax, vmin=vmin, levels=levels) #vmin,vmax-?
# fig.colorbar(cf, cax = cax)
# ax.set_title('%03d'%(i))
## print(i)
im.set_clim(vmin, vmax)
def update(i):
# vmax = np.max(arr)
# vmin = np.min(arr)
im.set_data(V[i,:,:])
ax.set_title('%03d'%(i))
# im.set_clim(vmin, vmax)
fig.canvas.manager.window.raise_() #iskelti langa i ant kitu
ani = animation.FuncAnimation(fig,update,t,interval=20)
# plt.tight_layout()
# ax.grid(True)
ax2.plot(V[:,12,16])
# ax.matshow(V[0,:,:])#, origin='lower')
#ani.save('contour.mp4', writer=writer)
#paiso V is vienos last.
# fig2, ax2 = plt.subplots()
# ax2.plot(V[:,32,64])
# plt.show(block = "true")
plt.show()
print('Done.')
#plt.yticks(range(Z.shape[0]))
#plt.xticks(range(Z.shape[1]))