/* * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ // ********************************************************************* // Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK // ********************************************************************* #include "oclUtils.h" #include #include #include #include #include ////////////////////////////////////////////////////////////////////////////// //! Gets the platform ID for NVIDIA if available, otherwise default //! //! @return the id //! @param clSelectedPlatformID OpenCL platoform ID ////////////////////////////////////////////////////////////////////////////// cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID) { char chBuffer[1024]; cl_uint num_platforms; cl_platform_id* clPlatformIDs; cl_int ciErrNum; *clSelectedPlatformID = NULL; // Get OpenCL platform count ciErrNum = clGetPlatformIDs (0, NULL, &num_platforms); if (ciErrNum != CL_SUCCESS) { shrLog(" Error %i in clGetPlatformIDs Call !!!\n\n", ciErrNum); return -1000; } else { if(num_platforms == 0) { shrLog("No OpenCL platform found!\n\n"); return -2000; } else { // if there's a platform or more, make space for ID's if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL) { shrLog("Failed to allocate memory for cl_platform ID's!\n\n"); return -3000; } // get platform info for each platform and trap the NVIDIA platform if found ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL); for(cl_uint i = 0; i < num_platforms; ++i) { ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &chBuffer, NULL); if(ciErrNum == CL_SUCCESS) { if(strstr(chBuffer, "NVIDIA") != NULL) { *clSelectedPlatformID = clPlatformIDs[i]; break; } } } // default to zeroeth platform if NVIDIA not found if(*clSelectedPlatformID == NULL) { shrLog("WARNING: NVIDIA OpenCL platform not found - defaulting to first platform!\n\n"); *clSelectedPlatformID = clPlatformIDs[0]; } free(clPlatformIDs); } } return CL_SUCCESS; } ////////////////////////////////////////////////////////////////////////////// //! Print the device name //! //! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE //! @param device OpenCL id of the device ////////////////////////////////////////////////////////////////////////////// void oclPrintDevName(int iLogMode, cl_device_id device) { char device_string[1024]; clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, "%s", device_string); } ////////////////////////////////////////////////////////////////////////////// //! Print info about the device //! //! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE //! @param device OpenCL id of the device ////////////////////////////////////////////////////////////////////////////// void oclPrintDevInfo(int iLogMode, cl_device_id device) { char device_string[1024]; bool nv_device_attibute_query = false; // CL_DEVICE_NAME clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_NAME: \t\t\t%s\n", device_string); // CL_DEVICE_VENDOR clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_VENDOR: \t\t\t%s\n", device_string); // CL_DRIVER_VERSION clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, " CL_DRIVER_VERSION: \t\t\t%s\n", device_string); // CL_DEVICE_VERSION clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_VERSION: \t\t\t%s\n", device_string); #if !defined(__APPLE__) && !defined(__MACOSX) // CL_DEVICE_OPENCL_C_VERSION (if CL_DEVICE_VERSION version > 1.0) if(strncmp("OpenCL 1.0", device_string, 10) != 0) { // This code is unused for devices reporting OpenCL 1.0, but a def is needed anyway to allow compilation using v 1.0 headers // This constant isn't #defined in 1.0 #ifndef CL_DEVICE_OPENCL_C_VERSION #define CL_DEVICE_OPENCL_C_VERSION 0x103D #endif clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_string), &device_string, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_OPENCL_C_VERSION: \t\t%s\n", device_string); } #endif // CL_DEVICE_TYPE cl_device_type type; clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if( type & CL_DEVICE_TYPE_CPU ) shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); if( type & CL_DEVICE_TYPE_GPU ) shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); if( type & CL_DEVICE_TYPE_ACCELERATOR ) shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); if( type & CL_DEVICE_TYPE_DEFAULT ) shrLogEx(iLogMode, 0, " CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", compute_units); // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS size_t workitem_dims; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(workitem_dims), &workitem_dims, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", workitem_dims); // CL_DEVICE_MAX_WORK_ITEM_SIZES size_t workitem_size[3]; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); // CL_DEVICE_MAX_WORK_GROUP_SIZE size_t workgroup_size; clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(workgroup_size), &workgroup_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", workgroup_size); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", clock_frequency); // CL_DEVICE_ADDRESS_BITS cl_uint addr_bits; clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addr_bits), &addr_bits, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_ADDRESS_BITS:\t\t%u\n", addr_bits); // CL_DEVICE_MAX_MEM_ALLOC_SIZE cl_ulong max_mem_alloc_size; clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(max_mem_alloc_size / (1024 * 1024))); // CL_DEVICE_GLOBAL_MEM_SIZE cl_ulong mem_size; clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(mem_size / (1024 * 1024))); // CL_DEVICE_ERROR_CORRECTION_SUPPORT cl_bool error_correction_support; clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(error_correction_support), &error_correction_support, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", error_correction_support == CL_TRUE ? "yes" : "no"); // CL_DEVICE_LOCAL_MEM_TYPE cl_device_local_mem_type local_mem_type; clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", local_mem_type == 1 ? "local" : "global"); // CL_DEVICE_LOCAL_MEM_SIZE clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(mem_size), &mem_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(mem_size / 1024)); // CL_DEVICE_QUEUE_PROPERTIES cl_command_queue_properties queue_properties; clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(queue_properties), &queue_properties, NULL); if( queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); if( queue_properties & CL_QUEUE_PROFILING_ENABLE ) shrLogEx(iLogMode, 0, " CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); // CL_DEVICE_IMAGE_SUPPORT cl_bool image_support; clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(image_support), &image_support, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", image_support); // CL_DEVICE_MAX_READ_IMAGE_ARGS cl_uint max_read_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(max_read_image_args), &max_read_image_args, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", max_read_image_args); // CL_DEVICE_MAX_WRITE_IMAGE_ARGS cl_uint max_write_image_args; clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(max_write_image_args), &max_write_image_args, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", max_write_image_args); // CL_DEVICE_SINGLE_FP_CONFIG cl_device_fp_config fp_config; clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_SINGLE_FP_CONFIG:\t\t%s%s%s%s%s%s\n", fp_config & CL_FP_DENORM ? "denorms " : "", fp_config & CL_FP_INF_NAN ? "INF-quietNaNs " : "", fp_config & CL_FP_ROUND_TO_NEAREST ? "round-to-nearest " : "", fp_config & CL_FP_ROUND_TO_ZERO ? "round-to-zero " : "", fp_config & CL_FP_ROUND_TO_INF ? "round-to-inf " : "", fp_config & CL_FP_FMA ? "fma " : ""); // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH size_t szMaxDims[5]; shrLogEx(iLogMode, 0, "\n CL_DEVICE_IMAGE "); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &szMaxDims[0], NULL); shrLogEx(iLogMode, 0, "\t\t\t2D_MAX_WIDTH\t %u\n", szMaxDims[0]); clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[1], NULL); shrLogEx(iLogMode, 0, "\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", szMaxDims[1]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &szMaxDims[2], NULL); shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_WIDTH\t %u\n", szMaxDims[2]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &szMaxDims[3], NULL); shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", szMaxDims[3]); clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &szMaxDims[4], NULL); shrLogEx(iLogMode, 0, "\t\t\t\t\t3D_MAX_DEPTH\t %u\n", szMaxDims[4]); // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(device_string), &device_string, NULL); if (device_string != 0) { shrLogEx(iLogMode, 0, "\n CL_DEVICE_EXTENSIONS:"); std::string stdDevString; stdDevString = std::string(device_string); size_t szOldPos = 0; size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited while (szSpacePos != stdDevString.npos) { if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 ) nv_device_attibute_query = true; if (szOldPos > 0) { shrLogEx(iLogMode, 0, "\t\t"); } shrLogEx(iLogMode, 0, "\t\t\t%s\n", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()); do { szOldPos = szSpacePos + 1; szSpacePos = stdDevString.find(' ', szOldPos); } while (szSpacePos == szOldPos); } shrLogEx(iLogMode, 0, "\n"); } else { shrLogEx(iLogMode, 0, " CL_DEVICE_EXTENSIONS: None\n"); } if(nv_device_attibute_query) { cl_uint compute_capability_major, compute_capability_minor; clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &compute_capability_major, NULL); clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &compute_capability_minor, NULL); shrLogEx(iLogMode, 0, "\n CL_DEVICE_COMPUTE_CAPABILITY_NV:\t%u.%u\n", compute_capability_major, compute_capability_minor); shrLogEx(iLogMode, 0, " NUMBER OF MULTIPROCESSORS:\t\t%u\n", compute_units); // this is the same value reported by CL_DEVICE_MAX_COMPUTE_UNITS shrLogEx(iLogMode, 0, " NUMBER OF CUDA CORES:\t\t\t%u\n", ConvertSMVer2Cores(compute_capability_major, compute_capability_minor) * compute_units); cl_uint regs_per_block; clGetDeviceInfo(device, CL_DEVICE_REGISTERS_PER_BLOCK_NV, sizeof(cl_uint), ®s_per_block, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_REGISTERS_PER_BLOCK_NV:\t%u\n", regs_per_block); cl_uint warp_size; clGetDeviceInfo(device, CL_DEVICE_WARP_SIZE_NV, sizeof(cl_uint), &warp_size, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_WARP_SIZE_NV:\t\t%u\n", warp_size); cl_bool gpu_overlap; clGetDeviceInfo(device, CL_DEVICE_GPU_OVERLAP_NV, sizeof(cl_bool), &gpu_overlap, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_GPU_OVERLAP_NV:\t\t%s\n", gpu_overlap == CL_TRUE ? "CL_TRUE" : "CL_FALSE"); cl_bool exec_timeout; clGetDeviceInfo(device, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, sizeof(cl_bool), &exec_timeout, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV:\t%s\n", exec_timeout == CL_TRUE ? "CL_TRUE" : "CL_FALSE"); cl_bool integrated_memory; clGetDeviceInfo(device, CL_DEVICE_INTEGRATED_MEMORY_NV, sizeof(cl_bool), &integrated_memory, NULL); shrLogEx(iLogMode, 0, " CL_DEVICE_INTEGRATED_MEMORY_NV:\t%s\n", integrated_memory == CL_TRUE ? "CL_TRUE" : "CL_FALSE"); } // CL_DEVICE_PREFERRED_VECTOR_WIDTH_ shrLogEx(iLogMode, 0, " CL_DEVICE_PREFERRED_VECTOR_WIDTH_\t"); cl_uint vec_width [6]; clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &vec_width[0], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &vec_width[1], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &vec_width[2], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &vec_width[3], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &vec_width[4], NULL); clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &vec_width[5], NULL); shrLogEx(iLogMode, 0, "CHAR %u, SHORT %u, INT %u, LONG %u, FLOAT %u, DOUBLE %u\n\n\n", vec_width[0], vec_width[1], vec_width[2], vec_width[3], vec_width[4], vec_width[5]); } ////////////////////////////////////////////////////////////////////////////// //! Get and return device capability //! //! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA //! @param device OpenCL id of the device ////////////////////////////////////////////////////////////////////////////// int oclGetDevCap(cl_device_id device) { char cDevString[1024]; bool bDevAttributeQuery = false; int iDevArch = -1; // Get device extensions, and if any then search for cl_nv_device_attribute_query clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(cDevString), &cDevString, NULL); if (cDevString != 0) { std::string stdDevString; stdDevString = std::string(cDevString); size_t szOldPos = 0; size_t szSpacePos = stdDevString.find(' ', szOldPos); // extensions string is space delimited while (szSpacePos != stdDevString.npos) { if( strcmp("cl_nv_device_attribute_query", stdDevString.substr(szOldPos, szSpacePos - szOldPos).c_str()) == 0 ) { bDevAttributeQuery = true; } do { szOldPos = szSpacePos + 1; szSpacePos = stdDevString.find(' ', szOldPos); } while (szSpacePos == szOldPos); } } // if search succeeded, get device caps if(bDevAttributeQuery) { cl_int iComputeCapMajor, iComputeCapMinor; clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), (void*)&iComputeCapMajor, NULL); clGetDeviceInfo(device, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), (void*)&iComputeCapMinor, NULL); iDevArch = (10 * iComputeCapMajor) + iComputeCapMinor; } return iDevArch; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the first device from the context //! //! @return the id //! @param cxGPUContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id oclGetFirstDev(cl_context cxGPUContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id first = cdDevices[0]; free(cdDevices); return first; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of device with maximal FLOPS from the context //! //! @return the id //! @param cxGPUContext OpenCL context ////////////////////////////////////////////////////////////////////////////// cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); cdDevices = (cl_device_id*) malloc(szParmDataBytes); size_t device_count = szParmDataBytes / sizeof(cl_device_id); clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id max_flops_device = cdDevices[0]; int max_flops = 0; size_t current_device = 0; // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); max_flops = compute_units * clock_frequency; ++current_device; while( current_device < device_count ) { // CL_DEVICE_MAX_COMPUTE_UNITS cl_uint compute_units; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); // CL_DEVICE_MAX_CLOCK_FREQUENCY cl_uint clock_frequency; clGetDeviceInfo(cdDevices[current_device], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, NULL); int flops = compute_units * clock_frequency; if( flops > max_flops ) { max_flops = flops; max_flops_device = cdDevices[current_device]; } ++current_device; } free(cdDevices); return max_flops_device; } ////////////////////////////////////////////////////////////////////////////// //! Loads a Program file and prepends the cPreamble to the code. //! //! @return the source string if succeeded, 0 otherwise //! @param cFilename program filename //! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header //! @param szFinalLength returned length of the code string ////////////////////////////////////////////////////////////////////////////// char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) { // locals FILE* pFileStream = NULL; size_t szSourceLength; // open the OpenCL source code file #ifdef _WIN32 // Windows version if(fopen_s(&pFileStream, cFilename, "rb") != 0) { return NULL; } #else // Linux version pFileStream = fopen(cFilename, "rb"); if(pFileStream == 0) { return NULL; } #endif size_t szPreambleLength = strlen(cPreamble); // get the length of the source code fseek(pFileStream, 0, SEEK_END); szSourceLength = ftell(pFileStream); fseek(pFileStream, 0, SEEK_SET); // allocate a buffer for the source code string and read it in char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); memcpy(cSourceString, cPreamble, szPreambleLength); if (fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream) != 1) { fclose(pFileStream); free(cSourceString); return 0; } // close the file and return the total length of the combined (preamble + source) string fclose(pFileStream); if(szFinalLength != 0) { *szFinalLength = szSourceLength + szPreambleLength; } cSourceString[szSourceLength + szPreambleLength] = '\0'; return cSourceString; } ////////////////////////////////////////////////////////////////////////////// //! Gets the id of the nth device from the context //! //! @return the id or -1 when out of range //! @param cxGPUContext OpenCL context //! @param device_idx index of the device of interest ////////////////////////////////////////////////////////////////////////////// cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int nr) { size_t szParmDataBytes; cl_device_id* cdDevices; // get the list of GPU devices associated with context clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); if( szParmDataBytes / sizeof(cl_device_id) <= nr ) { return (cl_device_id)-1; } cdDevices = (cl_device_id*) malloc(szParmDataBytes); clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); cl_device_id device = cdDevices[nr]; free(cdDevices); return device; } ////////////////////////////////////////////////////////////////////////////// //! Get the binary (PTX) of the program associated with the device //! //! @param cpProgram OpenCL program //! @param cdDevice device of interest //! @param binary returned code //! @param length length of returned code ////////////////////////////////////////////////////////////////////////////// void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length) { // Grab the number of devices associated witht the program cl_uint num_devices; clGetProgramInfo(cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &num_devices, NULL); // Grab the device ids cl_device_id* devices = (cl_device_id*) malloc(num_devices * sizeof(cl_device_id)); clGetProgramInfo(cpProgram, CL_PROGRAM_DEVICES, num_devices * sizeof(cl_device_id), devices, 0); // Grab the sizes of the binaries size_t* binary_sizes = (size_t*)malloc(num_devices * sizeof(size_t)); clGetProgramInfo(cpProgram, CL_PROGRAM_BINARY_SIZES, num_devices * sizeof(size_t), binary_sizes, NULL); // Now get the binaries char** ptx_code = (char**) malloc(num_devices * sizeof(char*)); for( unsigned int i=0; i= 0 && index < errorCount) ? errorString[index] : "Unspecified Error"; } // Helper function to get OpenCL image format string (channel order and type) from constant // ********************************************************************* const char* oclImageFormatString(cl_uint uiImageFormat) { // cl_channel_order if (uiImageFormat == CL_R)return "CL_R"; if (uiImageFormat == CL_A)return "CL_A"; if (uiImageFormat == CL_RG)return "CL_RG"; if (uiImageFormat == CL_RA)return "CL_RA"; if (uiImageFormat == CL_RGB)return "CL_RGB"; if (uiImageFormat == CL_RGBA)return "CL_RGBA"; if (uiImageFormat == CL_BGRA)return "CL_BGRA"; if (uiImageFormat == CL_ARGB)return "CL_ARGB"; if (uiImageFormat == CL_INTENSITY)return "CL_INTENSITY"; if (uiImageFormat == CL_LUMINANCE)return "CL_LUMINANCE"; // cl_channel_type if (uiImageFormat == CL_SNORM_INT8)return "CL_SNORM_INT8"; if (uiImageFormat == CL_SNORM_INT16)return "CL_SNORM_INT16"; if (uiImageFormat == CL_UNORM_INT8)return "CL_UNORM_INT8"; if (uiImageFormat == CL_UNORM_INT16)return "CL_UNORM_INT16"; if (uiImageFormat == CL_UNORM_SHORT_565)return "CL_UNORM_SHORT_565"; if (uiImageFormat == CL_UNORM_SHORT_555)return "CL_UNORM_SHORT_555"; if (uiImageFormat == CL_UNORM_INT_101010)return "CL_UNORM_INT_101010"; if (uiImageFormat == CL_SIGNED_INT8)return "CL_SIGNED_INT8"; if (uiImageFormat == CL_SIGNED_INT16)return "CL_SIGNED_INT16"; if (uiImageFormat == CL_SIGNED_INT32)return "CL_SIGNED_INT32"; if (uiImageFormat == CL_UNSIGNED_INT8)return "CL_UNSIGNED_INT8"; if (uiImageFormat == CL_UNSIGNED_INT16)return "CL_UNSIGNED_INT16"; if (uiImageFormat == CL_UNSIGNED_INT32)return "CL_UNSIGNED_INT32"; if (uiImageFormat == CL_HALF_FLOAT)return "CL_HALF_FLOAT"; if (uiImageFormat == CL_FLOAT)return "CL_FLOAT"; // unknown constant return "Unknown"; }