Hello everyone,
a small program of mine, using the oneAPI DFT, fails to link.
I provide a minimal working example, the Makefile, and the error message:
#include <random>
#include <algorithm>
#include <iterator>
#include <iostream>
#include <vector>
#include <complex>
#include <sycl/sycl.hpp>
#include <oneapi/mkl/dft.hpp>
namespace dft_ns = oneapi::mkl::dft;
template< typename T >
constexpr bool is_usable_compute_type = std::is_same_v< T, float > |
std::is_same_v< T, std::complex< float > > |
std::is_same_v< T, double > |
std::is_same_v< T, std::complex< double > >;
template< dft_ns::precision prec, dft_ns::domain dom >
void ready_descriptor( dft_ns::descriptor< prec, dom >& desc, sycl::queue& q )
{
std::int64_t rank;
desc.get_value( dft_ns::config_param::DIMENSION, &rank );
std::vector< std::int64_t > lengths( rank );
desc.get_value( dft_ns::config_param::LENGTHS, &lengths );
desc.commit( q );
}
template< typename T, std::enable_if_t< is_usable_compute_type< T >, bool > = true >
sycl::event compute_inplace_real_dft( sycl::queue& q,
const std::vector< std::int64_t >& lengths,
T* device_accessible_usm_data
)
{
constexpr bool is_single_precision = std::is_same_v< T, float > | std::is_same_v< T, std::complex< float > >;
constexpr auto prec = is_single_precision ? dft_ns::precision::SINGLE
: dft_ns::precision::DOUBLE;
auto desc = dft_ns::descriptor< prec, dft_ns::domain::REAL >( lengths );
ready_descriptor( desc, q );
return dft_ns::compute_forward( desc, device_accessible_usm_data );
}
std::string manufacturer = "NVIDIA";
int device_selector( const sycl::device& dev )
{
int score = 0;
if( dev.is_gpu() ) ++score;
std::string vendor = dev.get_info< sycl::info::device::vendor >();
if( vendor.find( manufacturer ) != std::string::npos )
++score;
return score;
}
void do_some_work( sycl::queue& q )
{
auto dev = q.get_device();
auto ctxt = q.get_context();
bool has_usm_shared = dev.has( sycl::aspect::usm_shared_allocations ),
has_usm_device = dev.has( sycl::aspect::usm_device_allocations ),
has_usm = has_usm_shared || has_usm_device;
constexpr int N = 2048;
if( has_usm )
{
float* data = nullptr;
if( has_usm_shared )
data = sycl::malloc_shared< float >( N, q );
else
data = sycl::malloc_device< float >( N, q );
std::random_device rnd_device;
std::mt19937 mersenne_engine{ rnd_device() }; // Generates random integers
std::uniform_real_distribution< float > dist{ -1.0f, +1.0f };
auto gen = [&](){
return dist( mersenne_engine );
};
std::generate( data, data + N, gen );
std::vector< std::int64_t > lengths{ N };
compute_inplace_real_dft< float >( q, lengths, data );
sycl::free( data, q );
}
}
int main( int argc, char** argv )
{
int num_args = 1;
while( num_args < argc )
{
if( std::string( "-m" ) == argv[num_args] ) manufacturer = argv[++num_args];
++num_args;
}
sycl::queue q{ device_selector };
do_some_work( q );
return 0;
}
The Makefile is
LDIR = /home/pablo/intel/oneapi/mkl/2025.1/
CFLAGS = -Werror -Wall -Wpedantic -I${IDIR} -std=c++17
LFLAGS = -fsycl -fsycl-device-code-split=per_kernel ${LDIR}/lib/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group ${LDIR}/lib/libmkl_intel_ilp64.a ${LDIR}/lib/libmkl_tbb_thread.a ${LDIR}/lib/libmkl_core.a -Wl,--end-group -L${LDIR}/lib/ -ltbb -lsycl -lOpenCL -lpthread -lm -ldl
DEVICES = -fsycl-targets=nvptx64-nvidia-cuda,spir64 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
example: example.cpp
icpx -o example example.cpp -fsycl ${CFLAGS} ${DEVICES} ${LFLAGS}
When I attemp to compile/link the code I get the following error message:
icpx -o example example.cpp -fsycl -Werror -Wall -Wpedantic -I/home/pablo/intel/oneapi/mkl/2025.1/include -std=c++17 -fsycl-targets=nvptx64-nvidia-cuda,spir64 -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -fsycl -fsycl-device-code-split=per_kernel /home/pablo/intel/oneapi/mkl/2025.1//lib/libmkl_sycl.a -Wl,-export-dynamic -Wl,--start-group /home/pablo/intel/oneapi/mkl/2025.1//lib/libmkl_intel_ilp64.a /home/pablo/intel/oneapi/mkl/2025.1//lib/libmkl_tbb_thread.a /home/pablo/intel/oneapi/mkl/2025.1//lib/libmkl_core.a -Wl,--end-group -L/home/pablo/intel/oneapi/mkl/2025.1//lib/ -ltbb -lsycl -lOpenCL -lpthread -lm -ldl
icpx: error: linked binaries do not contain expected 'nvptx64-nvidia-cuda-sm_60' target; found targets: 'spir64-unknown-unknown, spir64_gen-unknown-unknown, spir64' [-Werror,-Wsycl-target]
make: *** [Makefile:9: example] Fehler 1
In my system, sycl-ls gives
[level_zero:gpu][level_zero:0] Intel(R) oneAPI Unified Runtime over Level-Zero, Intel(R) UHD Graphics 12.2.0 [1.6.33276]
[opencl:cpu][opencl:0] Intel(R) OpenCL, Intel(R) Core(TM) i9-14900HX OpenCL 3.0 (Build 0) [2025.19.4.0.18_160000.xmain-hotfix]
[opencl:gpu][opencl:1] Intel(R) OpenCL Graphics, Intel(R) UHD Graphics OpenCL 3.0 NEO [25.13.033276]
[cuda:gpu][cuda:0] NVIDIA CUDA BACKEND, NVIDIA GeForce RTX 4070 Laptop GPU 8.9 [CUDA 12.8]
What flag should I give the linker, for it to generate the code that an NVIDIA device can run?
Thank you in advance!