编译MWE时
#include <iostream>
#include "cuda.h"
struct Foo{
///*
Foo( ){
std::cout << "Construct" << std::endl;
}
Foo( const Foo & that ){
std::cout << "Copy construct" << std::endl;
}
//*/
__host__ __device__
int bar( ) const {
return 0;
}
};
template<typename CopyBody>
__global__
void kernel( CopyBody cBody ){
cBody( );
}
template <typename CopyBody>
void wrapper( CopyBody && cBody ){
std::cout << "enquing kernel" << std::endl;
kernel<<<1,32>>>( cBody );
std::cout << "kernel enqued" << std::endl;
}
int main(int argc, char** argv) {
Foo foo;
std::cout << "enquing kernel" << std::endl;
kernel<<<1,32>>>( [=] __device__ ( ) { foo.bar( ); } );
std::cout << "kernel enqued" << std::endl;
cudaDeviceSynchronize( );
wrapper( [=] __device__ ( ) { foo.bar( ); } );
cudaDeviceSynchronize( );
return 0;
}使用CUDA10.1 (nvcc --expt-extended-lambda test.cu -o test),编译器对test.cu(16): warning: calling a __host__ function("Foo::Foo") from a __host__ __device__ function("") is not allowed发出警告。但是,从没有在设备上调用复制构造函数。CUDA 9.1不产生此警告。
对warning?
wrapper版本有什么区别?
#pragma hd_warning_disable或#pragma nv_exec_check_disable以消除它?
F 214
给定的MWE是基于一个较大的项目,其中wrapper决定是使用__device__还是__host__ lambda。不能将构造函数/析构函数标记为__host__ __device__,因为它们只需要在CPU上调用((De)分配CUDA内存)--或者删除构造函数/析构函数(并允许编译器创建默认的__host__和__device__版本)。
发布于 2020-11-14 19:49:24
经过以下修改,我没有收到任何警告:(我在Windows 10上使用了CUDA 10.1 )
#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
struct Baz {
Baz() {
printf("%s: Construct\n", __FUNCTION__);
}
Baz(const Baz & that) {
printf("%s: Copy Construct\n", __FUNCTION__);
}
};
struct Foo:
public Baz {
__host__ __device__
int bar() const {
return 0;
}
};
template<typename CopyBody>
__global__
void kernel(CopyBody cBody) {
cBody();
}
template <typename CopyBody>
void wrapper(CopyBody && cBody) {
printf("%s: enquing kernel\n",__FUNCTION__);
kernel << <1, 32 >> > (cBody);
printf("%s: kernel enqued\n", __FUNCTION__);
}
int main(int argc, char** argv) {
Foo foo;
printf("%s: enquing kernel\n", __FUNCTION__);
kernel << <1, 32 >> > ([=] __device__() { foo.bar(); });
printf("%s: kernel enqued\n", __FUNCTION__);
cudaDeviceSynchronize();
wrapper([=] __device__() { foo.bar(); });
cudaDeviceSynchronize();
return 0;
}上面的代码产生以下输出:
Foo::Foo: Construct
main: enquing kernel
Foo::Foo: Copy Construct
Foo::Foo: Copy Construct
main: kernel enqued
Foo::Foo: Copy Construct
Foo::Foo: Copy Construct
wrapper: enquing kernel
Foo::Foo: Copy Construct
wrapper: kernel enqued为了方便起见,我用<stdio.h>代替了<stdio.h。printf()从内核开始工作。
https://stackoverflow.com/questions/62810274
复制相似问题