*デバイスプロパティの取得 [#e52f7f5b] デバイスの情報は cudaGetDeviceProperties命令でデバイスプロパティを格納する構造体cudaDeviceProp を介して取得できます. #code(C){{ int n; //デバイス数 cutilSafeCall(cudaGetDeviceCount(&n)); for(int i = 0; i < n; ++i){ cudaDeviceProp dev; // デバイスプロパティ取得 cutilSafeCall(cudaGetDeviceProperties(&dev, i)); printf("device %d\n", i); printf(" device name : %s\n", dev.name); printf(" total global memory : %d (MB)\n", dev.totalGlobalMem/1024/1024); printf(" shared memory / block : %d (KB)\n", dev.sharedMemPerBlock/1024); printf(" register / block : %d\n", dev.regsPerBlock); printf(" warp size : %d\n", dev.warpSize); printf(" max pitch : %d (B)\n", dev.memPitch); printf(" max threads / block : %d\n", dev.maxThreadsPerBlock); printf(" max size of each dim. of block : (%d, %d, %d)\n", dev.maxThreadsDim[0], dev.maxThreadsDim[1], dev.maxThreadsDim[2]); printf(" max size of each dim. of grid : (%d, %d, %d)\n", dev.maxGridSize[0], dev.maxGridSize[1], dev.maxGridSize[2]); printf(" clock rate : %d (MHz)\n", dev.clockRate/1000); printf(" total constant memory : %d (KB)\n", dev.totalConstMem/1024); printf(" compute capability : %d.%d\n", dev.major, dev.minor); printf(" alignment requirement for texture : %d\n", dev.textureAlignment); printf(" device overlap : %s\n", (dev.deviceOverlap ? "ok" : "not")); printf(" num. of multiprocessors : %d\n", dev.multiProcessorCount); printf(" kernel execution timeout : %s\n", (dev.kernelExecTimeoutEnabled ? "on" : "off")); printf(" integrated : %s\n", (dev.integrated ? "on" : "off")); printf(" host memory mapping : %s\n", (dev.canMapHostMemory ? "on" : "off")); printf(" compute mode : "); if(dev.computeMode == cudaComputeModeDefault) printf("default mode (multiple threads can use) \n"); else if(dev.computeMode == cudaComputeModeExclusive) printf("exclusive mode (only one thread will be able to use)\n"); else if(dev.computeMode == cudaComputeModeProhibited) printf("prohibited mode (no threads can use)\n"); } }} ***GeForce GTX 285を搭載したビデオカード上での実行例 [#x08acc54] device 0 device name : GeForce GTX 285 total global memory : 1024 (MB) shared memory / block : 16 (KB) register / block : 16384 warp size : 32 max pitch : 262144 (B) max threads / block : 512 max size of each dim. of block : (512, 512, 64) max size of each dim. of grid : (65535, 65535, 1) clock rate : 1476 (MHz) total constant memory : 64 (KB) compute capability : 1.3 alignment requirement for texture : 256 device overlap : ok num. of multiprocessors : 30 kernel execution timeout : off integrated : off host memory mapping : off compute mode : default mode (multiple threads can use) ***GeForce GTX 580を搭載したビデオカード上での実行例 [#jf480e2f] device 0 device name : GeForce GTX 580 total global memory : 1503 (MB) shared memory / block : 48 (KB) register / block : 32768 warp size : 32 max pitch : 2147483647 (B) max threads / block : 1024 max size of each dim. of block : (1024, 1024, 64) max size of each dim. of grid : (65535, 65535, 65535) clock rate : 1544 (MHz) total constant memory : 64 (KB) compute capability : 2.0 alignment requirement for texture : 512 device overlap : ok num. of multiprocessors : 16 kernel execution timeout : on integrated : off host memory mapping : on compute mode : default mode (multiple threads can use) ***GeForce GTX TITANを搭載したビデオカード上での実行例 [#le67ed58] device 0 device name : GeForce GTX TITAN total global memory : 4095 (MB) shared memory / block : 48 (KB) register / block : 65536 warp size : 32 max pitch : 2147483647 (B) max threads / block : 1024 max size of each dim. of block : (1024, 1024, 64) max size of each dim. of grid : (2147483647, 65535, 65535) clock rate : 875 (MHz) total constant memory : 64 (KB) compute capability : 3.5 alignment requirement for texture : 512 device overlap : ok num. of multiprocessors : 14 kernel execution timeout : on integrated : off host memory mapping : on compute mode : default mode (multiple threads can use)