Recently, I encountered “an illegal memory access was encountered
” error during CUDA
programming. such as:
cudaSafeCall(cudaMemcpy(h_res, d_res, gParams.n*sizeof(uint32), cudaMemcpyDeviceToHost));
Because the kernel
in CUDA
programming is executed asynchronously, the code which reports error is not the original culprit usually. After referring this piece of code, I encapsulate a new cudaMemoryTest
function:
#define cudaSafeCall(call) \
do {\
cudaError_t err = call;\
if (cudaSuccess != err) \
{\
std::cerr << "CUDA error in " << __FILE__ << "(" << __LINE__ << "): " \
<< cudaGetErrorString(err);\
exit(EXIT_FAILURE);\
}\
} while(0)
void cudaMemoryTest()
{
const unsigned int N = 1048576;
const unsigned int bytes = N * sizeof(int);
int *h_a = (int*)malloc(bytes);
int *d_a;
cudaSafeCall(cudaMalloc((int**)&d_a, bytes));
memset(h_a, 0, bytes);
cudaSafeCall(cudaMemcpy(d_a, h_a, bytes, cudaMemcpyHostToDevice));
cudaSafeCall(cudaMemcpy(h_a, d_a, bytes, cudaMemcpyDeviceToHost));
return 0;
}
And insert this function call to the suspicious positions:
{
cudaMemoryTest();
kernel_A<<<...>>>;
......
cudaMemoryTest();
kernel_B<<<...>>>;
......
}
This method can notify me timely once the CUDA
memory access is exceptional, then I can investigate further.
Mostly, the reasons causing this issue is NULL
pointer or a pointer points to a already freed memory. But in multiple GPU
s environment, you must make sure the memory for one operation is allocated in the same device.