Hi again!
I’m trying to migrate this CUDA code to oneAPI, but I can’t. I need to get the same results using CUDA and sycl. Here is the full CUDA code:
#include <cstdio>
#define HEIGHT 7680
#define WIDTH 7245
typedef int4 it;
cudaArray *Array_Device;
texture<it, 2, cudaReadModeElementType> Image;
__global__ void k(int x, int y) {
int nx = x, no = 0;
no = x & 3; // modulo by 4
nx >>= 2; // division by 4
it val = tex2D(Image, nx, y);
int rval = reinterpret_cast<int *>(&val)[no];
printf("%d\n", rval);
}
void p() {
it *h = new it[WIDTH * HEIGHT];
for (int i = 0; i < HEIGHT; i++)
for (int j = 0; j < WIDTH; j++) {
h[i * WIDTH + j] = {i * 10000 + j * 4 + 0, i * 10000 + j * 4 + 1,
i * 10000 + j * 4 + 2, i * 10000 + j * 4 + 3};
}
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<it>();
cudaMallocArray(&Array_Device, &channelDesc, WIDTH, HEIGHT);
cudaBindTextureToArray(Image, Array_Device);
cudaMemcpy2DToArray(Array_Device, 0, 0, h, WIDTH * sizeof(it),
WIDTH * sizeof(it), HEIGHT, cudaMemcpyHostToDevice);
k<<<1, 1>>>(3670, 2000);
cudaDeviceSynchronize();
}
int main() { p(); }
So for example, this code print 20003670, The objective is to migrate to oneAPI and get the same result.
Thank you so much, this is a very useful forum!