Check following CUDA
code:
#include <iostream>
#define cudaSafeCall(call) \
do {\
cudaError_t err = call;\
if (cudaSuccess != err) \
{\
std::cerr << "CUDA error in " << __FILE__ << "(" << __LINE__ << "): " \
<< cudaGetErrorString(err) << std::endl;\
exit(EXIT_FAILURE);\
}\
} while(0)
int main(void)
{
char *a, *d_a;
cudaStream_t st;
cudaSafeCall(cudaStreamCreate(&st));
cudaSafeCall(cudaMallocHost(&a, 10));
cudaSafeCall(cudaMalloc(&d_a, 4));
cudaSafeCall(cudaMemcpyAsync(a, d_a, 10, cudaMemcpyHostToDevice, st));
return 0;
}
The d_a
is allocated only 4
bytes, but we want to copy 10
bytes. In this case, cudaMemcpyAsync
will complain invalid argument
error:
$ nvcc test.cu
$ ./a.out
CUDA error in test.cu(21): invalid argument