nvidia/cuda: Update to match CUDA 11.1 and later

Adds several new functions that may be required for operation
This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2021-04-26 03:50:17 +02:00 committed by Xaymar
parent 21f2c7fdc5
commit 2501afff3b
3 changed files with 236 additions and 240 deletions

View file

@ -308,7 +308,7 @@ void face_tracking_instance::async_track(std::shared_ptr<void> ptr)
#ifdef ENABLE_PROFILING #ifdef ENABLE_PROFILING
auto prof = _profile_ar_copy->track(); auto prof = _profile_ar_copy->track();
#endif #endif
::nvidia::cuda::memcpy2d_t mc; ::nvidia::cuda::memcpy2d_v2_t mc;
mc.src_x_in_bytes = 0; mc.src_x_in_bytes = 0;
mc.src_y = 0; mc.src_y = 0;
mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY; mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY;

View file

@ -47,17 +47,37 @@
if (!NAME) \ if (!NAME) \
throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
} }
#define CUDA_LOAD_SYMBOL_OPT(NAME) \
{ \
NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME)); \
if (!NAME) \
D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0); \
}
#define CUDA_LOAD_SYMBOL_EX(NAME, OVERRIDE) \
{ \
NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#OVERRIDE)); \
if (!NAME) \
throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
}
#define CUDA_LOAD_SYMBOL_OPT_EX(NAME, OVERRIDE) \
{ \
NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#OVERRIDE)); \
if (!NAME) \
D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0); \
}
#define CUDA_LOAD_SYMBOL_V2(NAME) \ #define CUDA_LOAD_SYMBOL_V2(NAME) \
{ \ { \
NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME "_v2")); \ NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME "_v2")); \
if (!NAME) \ if (!NAME) \
throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
} }
#define CUDA_LOAD_SYMBOL_EX(NAME, OVERRIDE) \ #define CUDA_LOAD_SYMBOL_OPT_V2(NAME) \
{ \ { \
NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#OVERRIDE)); \ NAME = reinterpret_cast<decltype(NAME)>(_library->load_symbol(#NAME "_v2")); \
if (!NAME) \ if (!NAME) \
throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ D_LOG_WARNING("Loading of optional symbol '" #NAME "' failed.", 0); \
} }
nvidia::cuda::cuda::~cuda() nvidia::cuda::cuda::~cuda()
@ -67,74 +87,21 @@ nvidia::cuda::cuda::~cuda()
nvidia::cuda::cuda::cuda() : _library() nvidia::cuda::cuda::cuda() : _library()
{ {
int32_t cuda_version = 0;
D_LOG_DEBUG("Initialization... (Addr: 0x%" PRIuPTR ")", this); D_LOG_DEBUG("Initialization... (Addr: 0x%" PRIuPTR ")", this);
_library = util::library::load(std::string_view(CUDA_NAME)); _library = util::library::load(std::string_view(CUDA_NAME));
{ // 1. Load critical initialization functions.
// Initialization // Initialization
CUDA_LOAD_SYMBOL(cuInit); CUDA_LOAD_SYMBOL(cuInit);
// Version Management // Version Management
CUDA_LOAD_SYMBOL(cuDriverGetVersion); CUDA_LOAD_SYMBOL(cuDriverGetVersion);
}
// Primary Context Management { // 2. Get the CUDA Driver version and log it.
CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain);
CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease);
CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxSetFlags);
// Context Management
CUDA_LOAD_SYMBOL_V2(cuCtxCreate);
CUDA_LOAD_SYMBOL_V2(cuCtxDestroy);
CUDA_LOAD_SYMBOL(cuCtxGetCurrent);
CUDA_LOAD_SYMBOL(cuCtxGetStreamPriorityRange);
CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent);
CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent);
CUDA_LOAD_SYMBOL(cuCtxSetCurrent);
CUDA_LOAD_SYMBOL(cuCtxSynchronize);
// Memory Management
CUDA_LOAD_SYMBOL_V2(cuArrayGetDescriptor);
CUDA_LOAD_SYMBOL_V2(cuMemAlloc);
CUDA_LOAD_SYMBOL_V2(cuMemAllocPitch);
CUDA_LOAD_SYMBOL_V2(cuMemFree);
CUDA_LOAD_SYMBOL_V2(cuMemHostGetDevicePointer);
CUDA_LOAD_SYMBOL(cuMemcpy);
CUDA_LOAD_SYMBOL_V2(cuMemcpy2D);
CUDA_LOAD_SYMBOL_V2(cuMemcpy2DAsync);
CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoA);
CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoD);
CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoH);
CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoHAsync);
CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoA);
CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoD);
CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoH);
CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoHAsync);
CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoA);
CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoAAsync);
CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoD);
CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoDAsync);
// Stream Managment
CUDA_LOAD_SYMBOL(cuStreamCreate);
CUDA_LOAD_SYMBOL(cuStreamCreateWithPriority);
CUDA_LOAD_SYMBOL_V2(cuStreamDestroy);
CUDA_LOAD_SYMBOL(cuStreamSynchronize);
// Graphics Interoperability
CUDA_LOAD_SYMBOL(cuGraphicsMapResources);
CUDA_LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray);
CUDA_LOAD_SYMBOL(cuGraphicsUnmapResources);
CUDA_LOAD_SYMBOL(cuGraphicsUnregisterResource);
#ifdef WIN32
// Direct3D11 Interopability
CUDA_LOAD_SYMBOL(cuD3D11GetDevice);
CUDA_LOAD_SYMBOL(cuGraphicsD3D11RegisterResource);
#endif
// Log found CUDA version.
{
int32_t cuda_version = 0;
if (cuDriverGetVersion(&cuda_version) == result::SUCCESS) { if (cuDriverGetVersion(&cuda_version) == result::SUCCESS) {
int32_t major = cuda_version / 1000; int32_t major = cuda_version / 1000;
int32_t minor = (cuda_version % 1000) / 10; int32_t minor = (cuda_version % 1000) / 10;
@ -145,6 +112,128 @@ nvidia::cuda::cuda::cuda() : _library()
} }
} }
{ // 3. Load remaining functions.
// Device Management
// - Not yet needed.
// Primary Context Management
CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain);
CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease);
CUDA_LOAD_SYMBOL_OPT_V2(cuDevicePrimaryCtxSetFlags);
// Context Management
CUDA_LOAD_SYMBOL_V2(cuCtxCreate);
CUDA_LOAD_SYMBOL_V2(cuCtxDestroy);
CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent);
CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent);
CUDA_LOAD_SYMBOL_OPT(cuCtxGetCurrent);
CUDA_LOAD_SYMBOL_OPT(cuCtxSetCurrent);
CUDA_LOAD_SYMBOL(cuCtxGetStreamPriorityRange);
CUDA_LOAD_SYMBOL(cuCtxSynchronize);
// Module Management
// - Not yet needed.
// Memory Management
CUDA_LOAD_SYMBOL_V2(cuMemAlloc);
CUDA_LOAD_SYMBOL_V2(cuMemAllocPitch);
CUDA_LOAD_SYMBOL_V2(cuMemFree);
CUDA_LOAD_SYMBOL(cuMemcpy);
CUDA_LOAD_SYMBOL_V2(cuMemcpy2D);
CUDA_LOAD_SYMBOL_V2(cuMemcpy2DAsync);
CUDA_LOAD_SYMBOL_OPT_V2(cuArrayGetDescriptor);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoA);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoD);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoH);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyAtoHAsync);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoA);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoD);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoH);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyDtoHAsync);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoA);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoAAsync);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoD);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemcpyHtoDAsync);
CUDA_LOAD_SYMBOL_OPT_V2(cuMemHostGetDevicePointer);
// Virtual Memory Management
// - Not yet needed.
// Stream Ordered Memory Allocator
// - Not yet needed.
// Unified Addressing
// - Not yet needed.
// Stream Management
CUDA_LOAD_SYMBOL(cuStreamCreate);
CUDA_LOAD_SYMBOL_V2(cuStreamDestroy);
CUDA_LOAD_SYMBOL(cuStreamSynchronize);
CUDA_LOAD_SYMBOL_OPT(cuStreamCreateWithPriority);
CUDA_LOAD_SYMBOL_OPT(cuStreamGetPriority);
// Event Management
// - Not yet needed.
// External Resource Interoperability (CUDA 11.1+)
// - Not yet needed.
// Stream Memory Operations
// - Not yet needed.
// Execution Control
// - Not yet needed.
// Graph Management
// - Not yet needed.
// Occupancy
// - Not yet needed.
// Texture Object Management
// - Not yet needed.
// Surface Object Management
// - Not yet needed.
// Peer Context Memory Access
// - Not yet needed.
// Graphics Interoperability
CUDA_LOAD_SYMBOL(cuGraphicsMapResources);
CUDA_LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray);
CUDA_LOAD_SYMBOL(cuGraphicsUnmapResources);
CUDA_LOAD_SYMBOL(cuGraphicsUnregisterResource);
// Driver Entry Point Access
// - Not yet needed.
// Profiler Control
// - Not yet needed.
// OpenGL Interoperability
// - Not yet needed.
// VDPAU Interoperability
// - Not yet needed.
// EGL Interoperability
// - Not yet needed.
#ifdef WIN32
// Direct3D9 Interoperability
// - Not yet needed.
// Direct3D10 Interoperability
CUDA_LOAD_SYMBOL(cuD3D10GetDevice);
CUDA_LOAD_SYMBOL_OPT(cuGraphicsD3D10RegisterResource);
// Direct3D11 Interoperability
CUDA_LOAD_SYMBOL(cuD3D11GetDevice);
CUDA_LOAD_SYMBOL_OPT(cuGraphicsD3D11RegisterResource);
#endif
}
// Initialize CUDA // Initialize CUDA
cuInit(0); cuInit(0);
} }

View file

@ -36,7 +36,7 @@
typedef ::nvidia::cuda::result (*t##name)(__VA_ARGS__); \ typedef ::nvidia::cuda::result (*t##name)(__VA_ARGS__); \
\ \
public: \ public: \
t##name name; t##name name = nullptr;
namespace nvidia::cuda { namespace nvidia::cuda {
enum class result : std::size_t { enum class result : std::size_t {
@ -84,6 +84,18 @@ namespace nvidia::cuda {
LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10, LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10,
}; };
enum class external_memory_handle_type : uint32_t {
INVALID = 0,
FILE_DESCRIPTOR = 1,
WIN32_SHARED_HANDLE = 2,
WIN32_GLOBAL_SHARED_HANDLE = 3,
D3D12_HEAP = 4,
D3D12_RESOURCE = 5,
D3D11_SHARED_RESOURCE = 6,
D3D11_GLOBAL_SHARED_RESOURCE = 7,
NVSCIBUF = 8,
};
enum class stream_flags : uint32_t { enum class stream_flags : uint32_t {
DEFAULT = 0x0, DEFAULT = 0x0,
NON_BLOCKING = 0x1, NON_BLOCKING = 0x1,
@ -92,11 +104,12 @@ namespace nvidia::cuda {
typedef void* array_t; typedef void* array_t;
typedef void* context_t; typedef void* context_t;
typedef uint64_t device_ptr_t; typedef uint64_t device_ptr_t;
typedef void* external_memory_t;
typedef void* graphics_resource_t; typedef void* graphics_resource_t;
typedef void* stream_t; typedef void* stream_t;
typedef int32_t device_t; typedef int32_t device_t;
struct memcpy2d_t { struct memcpy2d_v2_t {
std::size_t src_x_in_bytes; std::size_t src_x_in_bytes;
std::size_t src_y; std::size_t src_y;
@ -119,13 +132,35 @@ namespace nvidia::cuda {
std::size_t height; std::size_t height;
}; };
struct array_descriptor_t { struct array_descriptor_v2_t {
std::size_t width; std::size_t width;
std::size_t height; std::size_t height;
uint32_t num_channels; uint32_t num_channels;
array_format format; array_format format;
}; };
struct external_memory_buffer_info_v1_t {
uint64_t offset;
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
struct external_memory_handle_info_v1_t {
external_memory_handle_type type;
union {
int32_t file;
struct {
void* handle;
const void* name;
};
const void* nvscibuf;
};
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
class cuda_error : public std::exception { class cuda_error : public std::exception {
::nvidia::cuda::result _code; ::nvidia::cuda::result _code;
@ -154,96 +189,36 @@ namespace nvidia::cuda {
CUDA_DEFINE_FUNCTION(cuDriverGetVersion, int32_t* driverVersion); CUDA_DEFINE_FUNCTION(cuDriverGetVersion, int32_t* driverVersion);
// Device Management // Device Management
// cuDeviceGet // - Not yet needed.
// cuDeviceGetAttribute
// cuDeviceGetCount
// cuDeviceGetLuid
// cuDeviceGetName
// cuDeviceGetNvSciSyncAttributes
// cuDeviceGetUuid
// cuDeviceTotalMem_v2
// Primary Context Management // Primary Context Management
// cuDevicePrimaryCtxGetState
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device); CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device);
// cuDevicePrimaryCtxReset_v2
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device); CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device);
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags); CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags);
// Context Management // Context Management
CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device); CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device);
CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx); CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx);
// cuCtxGetApiVersion
// cuCtxGetCacheConfig
CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx); CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx);
// cuCtxGetDevice
// cuCtxGetFlags
// cuCtxGetLimit
// cuCtxGetSharedMemConfig
CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, int32_t* lowestPriority, int32_t* highestPriority); CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, int32_t* lowestPriority, int32_t* highestPriority);
CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx); CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx);
CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx); CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx);
// cuCtxSetCacheConfig
CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx); CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx);
// cuCtxSetLimit
// cuCtxSetSharedMemConfig
// cuCtxSynchronize
CUDA_DEFINE_FUNCTION(cuCtxSynchronize); CUDA_DEFINE_FUNCTION(cuCtxSynchronize);
// UNDOCUMENTED? cuCtxResetPersistingL2Cache
// Module Management // Module Management
// cuLinkAddData // - Not yet needed.
// cuLinkAddFile
// cuLinkComplete
// cuLinkCreate
// cuLinkDestroy
// cuModuleGetFunction
// cuModuleGetGlobal
// cuModuleGetSurfRef
// cuModuleGetTexRef
// cuModuleLoad
// cuModuleLoadData
// cuModuleLoadDataEx
// cuModuleLoadFatBinary
// cuModuleUnload
// Memory Management // Memory Management
// cuArray3DCreate_v2 CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_v2_t* pArrayDescripter, array_t array);
// cuArray3DGetDescripter_v2
// cuArrayCreate_v2
// cuArrayDestroy
CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_t* pArrayDescripter, array_t array);
// cuArrayGetDescriptor_v2
// cuDeviceGetByPCIBusId
// cuDeviceGetPCIBusId
// cuIpcCloseMemHandle
// cuIpcGetEventHandle
// cuIpcGetMemHandle
// cuIpcOpenEventHandle
// cuIpcOpenMemHandle
CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes); CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes);
// cuMemAllocHost_v2
// cuMemAllocManaged
CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes, CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
std::size_t height, uint32_t element_size_bytes); std::size_t height, uint32_t element_size_bytes);
CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr); CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr);
// cuMemFreeHost
// cuMemGetAddressRange_v2
// cuMemGetInfo_v2
// cuMemHostAlloc
CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, uint32_t flags); CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, uint32_t flags);
// cuMemHostGetFlags
// cuMemHostRegister_v2
// cuMemHostUnregister
CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes); CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes);
CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_t* copy); CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_v2_t* copy);
CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_t* copy, stream_t stream); CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_v2_t* copy, stream_t stream);
// cuMemcpy2DUnaligned_v2 / _v2_ptds
// cuMemcpy3D_v2 / _v2_ptds
// cuMemcpy3DAsync_v2 / _v2_ptsz
// cuMemcpy3DPeer / _ptds
// cuMemcpy3DPeerAsync_v2 / _v2_ptsz
// cuMemcpyAsync / _ptsz
CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset, CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset,
std::size_t byteCount); std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset, std::size_t byteCount); CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
@ -257,151 +232,83 @@ namespace nvidia::cuda {
CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount); CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount); CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount); CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount);
// cuMemcpyPeer / _ptds
// cuMemcpyPeerAsync / _ptsz
// cuMemsetD16
// cuMemsetD16Async
// cuMemsetD2D16
// cuMemsetD2D16Async
// cuMemsetD2D32
// cuMemsetD2D32Async
// cuMemsetD2D8
// cuMemsetD2D8Async
// cuMemsetD32
// cuMemsetD32Async
// cuMemsetD8
// cuMemsetD8Async
// cuMipmappedArrayCreate
// cuMipmappedArrayDestroy
// cuMipmappedArrayGetLevel
// Virtual Memory Management // Virtual Memory Management
// cuMemAddressFree // - Not yet needed.
// cuMemAddressReserve
// cuMemCreate // Stream Ordered Memory Allocator
// cuMemExportToShareableHandle // - Not yet needed.
// cuMemGetAccess
// cuMemGetAllocationGranularity
// cuMemGetAllocationPropertiesFromHandle
// cuMemImportFromShareableHandle
// cuMemMap
// cuMemRelease
// cuMemSetAccess
// cuMemUnmap
// Unified Addressing // Unified Addressing
// cuMemAdvise // - Not yet needed.
// cuMemPrefetchAsync
// cuMemRangeGetAttribute
// cuMemRangeGetAttributes
// cuPointerGetAttribute
// cuPointerGetAttributes
// cuPointerSetAttribute
// Stream Managment // Stream Managment
// cuStreamAddCallback
// cuStreamAttachMemAsync
// cuStreamBeginCapture_v2
CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags); CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags);
CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, int32_t priority); CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, int32_t priority);
CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream); CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream);
// cuStreamEndCapture
// cuStreamGetCaptureInfo
// cuStreamGetCtx
// cuStreamGetFlags
// cuStreamGetPriority
// cuStreamIsCapturing
// cuStreamQuery
CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream); CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream);
// cuStreamWaitEvent CUDA_DEFINE_FUNCTION(cuStreamGetPriority, stream_t stream, int32_t* priority);
// cuThreadExchangeStreamCaptureMode
// Event Management // Event Management
// cuEventCreate // - Not yet needed.
// cuEventDestroy_v2
// cuEventElapsedTime
// cuEventQuery
// cuEventRecord
// cuEventSynchronize
// External Resource Interoperability // External Resource Interoperability (CUDA 11.1+)
// cuDestroyExternalMemory // - Not yet needed.
// cuDestroyExternalSemaphore
// cuExternalMemoryGetMappedBuffer
// cuExternalMemoryGetMappedMipmappedArray
// cuImportExternalMemory
// cuImportExternalSemaphore
// cuSignalExternalSemaphoresAsync
// cuWaitExternalSemaphoresAsync
// Stream Memory Operations // Stream Memory Operations
// cuStreamBatchMemOp // - Not yet needed.
// cuStreamWaitValue32
// cuStreamWaitValue64
// cuStreamWriteValue32
// cuStreamWriteValue64
// Execution Control // Execution Control
// cuFuncGetAttribute // - Not yet needed.
// cuFuncSetAttribute
// cuFuncSetCacheConfig
// cuFuncSetSharedMemConfig
// cuLaunchCooperativeKernel
// cuLaunchCooperativeKernelMultiDevice
// cuLaunchHostFunc
// cuLaunchKernel
// Graph Management // Graph Management
// Todo! // - Not yet needed.
// Occupancy // Occupancy
// Todo // - Not yet needed.
// Texture Object Management // Texture Object Management
// Todo // - Not yet needed.
// Surface Object Management // Surface Object Management
// Todo // - Not yet needed.
// Peer Context Memory Access // Peer Context Memory Access
// Todo // - Not yet needed.
// Graphics Interoperability // Graphics Interoperability
CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, uint32_t count, graphics_resource_t* resources, stream_t stream); CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, uint32_t count, graphics_resource_t* resources, stream_t stream);
// cuGraphicsResourcesGetMappedMipmappedArray
// cuGraphicsResourcesGetMappedPointer_v2
// cuGraphicsResourcesSetMapFlags_v2
CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource, CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource,
uint32_t index, uint32_t level); uint32_t index, uint32_t level);
CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, uint32_t count, graphics_resource_t* resources, stream_t stream); CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, uint32_t count, graphics_resource_t* resources, stream_t stream);
CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource); CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource);
// Profile Control // Driver Entry Point Access
// Todo // - Not yet needed.
// Profiler Control
// - Not yet needed.
// OpenGL Interoperability // OpenGL Interoperability
// cuGLGetDevices // - Not yet needed.
// cuGraphcisGLRegisterBuffer
// cuGraphcisGLRegisterImage // VDPAU Interoperability
// - Not yet needed.
// EGL Interoperability
// - Not yet needed.
#ifdef WIN32 #ifdef WIN32
// cuWGLGetDevice // Direct3D9 Interoperability
// - Not yet needed.
// Direct3D9 Interopability // Direct3D10 Interoperability
// cuD3D9CtxCreate CUDA_DEFINE_FUNCTION(cuD3D10GetDevice, device_t* device, IDXGIAdapter* adapter);
// cuD3D9CtxCreateOnDevice CUDA_DEFINE_FUNCTION(cuGraphicsD3D10RegisterResource, graphics_resource_t* resource,
// cuD3D9CtxGetDevice ID3D10Resource* d3dresource, uint32_t flags);
// cuD3D9CtxGetDevices
// cuD3D9GetDirect3DDevice
// cuGraphicsD3D9RegisterResource
// Direct3D10 Interopability // Direct3D11 Interoperability
// cuD3D10GetDevice
// cuD3D10GetDevices
// cuGraphicsD3D10RegisterResource
// Direct3D11 Interopability
CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter); CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter);
// cuD3D11GetDevices
CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource, CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource,
ID3D11Resource* d3dresource, uint32_t flags); ID3D11Resource* d3dresource, uint32_t flags);
#endif #endif