diff --git a/source/filters/filter-nv-face-tracking.cpp b/source/filters/filter-nv-face-tracking.cpp index 68fe8bce..dfc20aa9 100644 --- a/source/filters/filter-nv-face-tracking.cpp +++ b/source/filters/filter-nv-face-tracking.cpp @@ -79,7 +79,7 @@ face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_ _geometry = std::make_shared(4, 1); auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx); _cuda_stream = - std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::cu_stream_flags::NON_BLOCKING, 0); + std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::stream_flags::NON_BLOCKING, 0); } { // Asynchronously load Face Tracking. @@ -311,26 +311,26 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) #ifdef ENABLE_PROFILING auto prof = _profile_ar_copy->track(); #endif - ::nvidia::cuda::cu_memcpy2d_t mc; + ::nvidia::cuda::memcpy2d_t mc; mc.src_x_in_bytes = 0; mc.src_y = 0; - mc.src_memory_type = ::nvidia::cuda::cu_memory_type::ARRAY; + mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY; mc.src_host = nullptr; mc.src_device = 0; mc.src_array = _ar_texture_cuda->map(_cuda_stream); mc.src_pitch = static_cast(_ar_image.pitch); mc.dst_x_in_bytes = 0; mc.dst_y = 0; - mc.dst_memory_type = ::nvidia::cuda::cu_memory_type::DEVICE; + mc.dst_memory_type = ::nvidia::cuda::memory_type::DEVICE; mc.dst_host = 0; - mc.dst_device = reinterpret_cast<::nvidia::cuda::cu_device_ptr_t>(_ar_image.pixels); + mc.dst_device = reinterpret_cast<::nvidia::cuda::device_ptr_t>(_ar_image.pixels); mc.dst_array = 0; mc.dst_pitch = static_cast(_ar_image.pitch); mc.width_in_bytes = static_cast(_ar_image.pitch); mc.height = _ar_image.height; - if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get()); - res != ::nvidia::cuda::cu_result::SUCCESS) { + if (::nvidia::cuda::result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get()); + res != ::nvidia::cuda::result::SUCCESS) { LOG_ERROR("<%s> Failed to prepare buffers for tracking.", obs_source_get_name(_self)); return; } diff --git a/source/nvidia/cuda/nvidia-cuda-context-stack.cpp b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp index a9c9f20b..731056a6 100644 --- a/source/nvidia/cuda/nvidia-cuda-context-stack.cpp +++ b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp @@ -31,7 +31,7 @@ nvidia::cuda::context_stack::context_stack(std::shared_ptr<::nvidia::cuda::cuda> if (!context) throw std::invalid_argument("context"); - if (cu_result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != cu_result::SUCCESS) { + if (result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != result::SUCCESS) { throw std::runtime_error("Failed to push context."); } } @@ -40,7 +40,7 @@ nvidia::cuda::context_stack::~context_stack() { using namespace ::nvidia::cuda; - cu_context_t ctx; + context_t ctx; _cuda->cuCtxGetCurrent(&ctx); if (ctx == _ctx->get()) { _cuda->cuCtxPopCurrent(&ctx); diff --git a/source/nvidia/cuda/nvidia-cuda-context.cpp b/source/nvidia/cuda/nvidia-cuda-context.cpp index 9d47e369..6d1ebefe 100644 --- a/source/nvidia/cuda/nvidia-cuda-context.cpp +++ b/source/nvidia/cuda/nvidia-cuda-context.cpp @@ -62,12 +62,12 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1 dxgi_device->GetAdapter(&dxgi_adapter); // Get Device Index - if (cu_result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != cu_result::SUCCESS) { + if (result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != result::SUCCESS) { throw std::runtime_error("Failed to get device index for device."); } // Acquire Context - if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) { + if (result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != result::SUCCESS) { throw std::runtime_error("Failed to acquire primary device context."); } @@ -75,7 +75,7 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1 } #endif -::nvidia::cuda::cu_context_t nvidia::cuda::context::get() +::nvidia::cuda::context_t nvidia::cuda::context::get() { return _ctx; } diff --git a/source/nvidia/cuda/nvidia-cuda-context.hpp b/source/nvidia/cuda/nvidia-cuda-context.hpp index 94916146..ad79df10 100644 --- a/source/nvidia/cuda/nvidia-cuda-context.hpp +++ b/source/nvidia/cuda/nvidia-cuda-context.hpp @@ -24,11 +24,11 @@ namespace nvidia::cuda { class context { std::shared_ptr<::nvidia::cuda::cuda> _cuda; - ::nvidia::cuda::cu_context_t _ctx; + ::nvidia::cuda::context_t _ctx; // Primary Device Context bool _has_device; - ::nvidia::cuda::cu_device_t _device; + ::nvidia::cuda::device_t _device; private: context(std::shared_ptr<::nvidia::cuda::cuda> cuda); @@ -40,6 +40,6 @@ namespace nvidia::cuda { context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device); #endif - ::nvidia::cuda::cu_context_t get(); + ::nvidia::cuda::context_t get(); }; } // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp index ceea4c32..a9aa0f74 100644 --- a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp +++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp @@ -54,7 +54,7 @@ nvidia::cuda::gstexture::gstexture(std::shared_ptr cuda, std } switch (_cuda->cuGraphicsD3D11RegisterResource(&_resource, resource, 0)) { - case nvidia::cuda::cu_result::SUCCESS: + case nvidia::cuda::result::SUCCESS: break; default: throw std::runtime_error("nvidia::cuda::gstexture: Failed to register resource."); @@ -69,15 +69,15 @@ nvidia::cuda::gstexture::~gstexture() _cuda->cuGraphicsUnregisterResource(_resource); } -nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr stream) +nvidia::cuda::array_t nvidia::cuda::gstexture::map(std::shared_ptr stream) { if (_is_mapped) { return _pointer; } - cu_graphics_resource_t resources[] = {_resource}; + graphics_resource_t resources[] = {_resource}; switch (_cuda->cuGraphicsMapResources(1, resources, stream->get())) { - case nvidia::cuda::cu_result::SUCCESS: + case nvidia::cuda::result::SUCCESS: break; default: throw std::runtime_error("nvidia::cuda::gstexture: Mapping failed."); @@ -87,7 +87,7 @@ nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptrcuGraphicsSubResourceGetMappedArray(&_pointer, _resource, 0, 0)) { - case nvidia::cuda::cu_result::SUCCESS: + case nvidia::cuda::result::SUCCESS: break; default: unmap(); @@ -102,9 +102,9 @@ void nvidia::cuda::gstexture::unmap() if (!_is_mapped) return; - cu_graphics_resource_t resources[] = {_resource}; + graphics_resource_t resources[] = {_resource}; switch (_cuda->cuGraphicsUnmapResources(1, resources, _stream->get())) { - case nvidia::cuda::cu_result::SUCCESS: + case nvidia::cuda::result::SUCCESS: break; default: throw std::runtime_error("nvidia::cuda::gstexture: Unmapping failed."); diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp index f5ec127a..59fb3d7f 100644 --- a/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp +++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp @@ -28,17 +28,17 @@ namespace nvidia::cuda { class gstexture { std::shared_ptr<::nvidia::cuda::cuda> _cuda; std::shared_ptr _texture; - cu_graphics_resource_t _resource; + graphics_resource_t _resource; bool _is_mapped; - cu_array_t _pointer; + array_t _pointer; std::shared_ptr _stream; public: gstexture(std::shared_ptr cuda, std::shared_ptr texture); ~gstexture(); - cu_array_t map(std::shared_ptr stream); + array_t map(std::shared_ptr stream); void unmap(); }; } // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-memory.cpp b/source/nvidia/cuda/nvidia-cuda-memory.cpp index 73081c86..76036e6a 100644 --- a/source/nvidia/cuda/nvidia-cuda-memory.cpp +++ b/source/nvidia/cuda/nvidia-cuda-memory.cpp @@ -23,9 +23,9 @@ nvidia::cuda::memory::memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size) : _cuda(cuda), _pointer(), _size(size) { - ::nvidia::cuda::cu_result res = _cuda->cuMemAlloc(&_pointer, size); + ::nvidia::cuda::result res = _cuda->cuMemAlloc(&_pointer, size); switch (res) { - case ::nvidia::cuda::cu_result::SUCCESS: + case ::nvidia::cuda::result::SUCCESS: break; default: throw std::runtime_error("nvidia::cuda::memory: cuMemAlloc failed."); @@ -37,7 +37,7 @@ nvidia::cuda::memory::~memory() _cuda->cuMemFree(_pointer); } -nvidia::cuda::cu_device_ptr_t nvidia::cuda::memory::get() +nvidia::cuda::device_ptr_t nvidia::cuda::memory::get() { return _pointer; } diff --git a/source/nvidia/cuda/nvidia-cuda-memory.hpp b/source/nvidia/cuda/nvidia-cuda-memory.hpp index 71449d7c..57441c1c 100644 --- a/source/nvidia/cuda/nvidia-cuda-memory.hpp +++ b/source/nvidia/cuda/nvidia-cuda-memory.hpp @@ -25,14 +25,14 @@ namespace nvidia::cuda { class memory { std::shared_ptr<::nvidia::cuda::cuda> _cuda; - cu_device_ptr_t _pointer; + device_ptr_t _pointer; std::size_t _size; public: memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size); ~memory(); - cu_device_ptr_t get(); + device_ptr_t get(); std::size_t size(); }; diff --git a/source/nvidia/cuda/nvidia-cuda-stream.cpp b/source/nvidia/cuda/nvidia-cuda-stream.cpp index 377a1818..3a806dc0 100644 --- a/source/nvidia/cuda/nvidia-cuda-stream.cpp +++ b/source/nvidia/cuda/nvidia-cuda-stream.cpp @@ -20,18 +20,18 @@ #include "nvidia-cuda-stream.hpp" #include -nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::cu_stream_flags flags, +nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::stream_flags flags, std::int32_t priority) : _cuda(cuda) { - nvidia::cuda::cu_result res; + nvidia::cuda::result res; if (priority == 0) { res = _cuda->cuStreamCreate(&_stream, flags); } else { res = _cuda->cuStreamCreateWithPriority(&_stream, flags, priority); } switch (res) { - case nvidia::cuda::cu_result::SUCCESS: + case nvidia::cuda::result::SUCCESS: break; default: throw std::runtime_error("Failed to create CUstream object."); @@ -43,7 +43,7 @@ nvidia::cuda::stream::~stream() _cuda->cuStreamDestroy(_stream); } -::nvidia::cuda::cu_stream_t nvidia::cuda::stream::get() +::nvidia::cuda::stream_t nvidia::cuda::stream::get() { return _stream; } diff --git a/source/nvidia/cuda/nvidia-cuda-stream.hpp b/source/nvidia/cuda/nvidia-cuda-stream.hpp index 5dd97db9..12c56d88 100644 --- a/source/nvidia/cuda/nvidia-cuda-stream.hpp +++ b/source/nvidia/cuda/nvidia-cuda-stream.hpp @@ -24,14 +24,14 @@ namespace nvidia::cuda { class stream { std::shared_ptr<::nvidia::cuda::cuda> _cuda; - ::nvidia::cuda::cu_stream_t _stream; + ::nvidia::cuda::stream_t _stream; public: stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, - ::nvidia::cuda::cu_stream_flags flags = ::nvidia::cuda::cu_stream_flags::DEFAULT, + ::nvidia::cuda::stream_flags flags = ::nvidia::cuda::stream_flags::DEFAULT, std::int32_t priority = 0); ~stream(); - ::nvidia::cuda::cu_stream_t get(); + ::nvidia::cuda::stream_t get(); }; } // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda.hpp b/source/nvidia/cuda/nvidia-cuda.hpp index f9b35356..5ce80223 100644 --- a/source/nvidia/cuda/nvidia-cuda.hpp +++ b/source/nvidia/cuda/nvidia-cuda.hpp @@ -32,15 +32,15 @@ #pragma warning(pop) #endif -#define CUDA_DEFINE_FUNCTION(name, ...) \ - private: \ - typedef ::nvidia::cuda::cu_result (*t##name)(__VA_ARGS__); \ - \ - public: \ +#define CUDA_DEFINE_FUNCTION(name, ...) \ + private: \ + typedef ::nvidia::cuda::result (*t##name)(__VA_ARGS__); \ + \ + public: \ t##name name; namespace nvidia::cuda { - enum class cu_result : std::size_t { + enum class result : std::size_t { SUCCESS = 0, INVALID_VALUE = 1, OUT_OF_MEMORY = 2, @@ -58,14 +58,14 @@ namespace nvidia::cuda { // Still missing some. }; - enum class cu_memory_type : std::uint32_t { + enum class memory_type : std::uint32_t { HOST = 1, DEVICE = 2, ARRAY = 3, UNIFIED = 4, }; - enum class cu_array_format : std::uint32_t { + enum class array_format : std::uint32_t { UNSIGNED_INT8 = 0b00000001, UNSIGNED_INT16 = 0b00000010, UNSIGNED_INT32 = 0b00000011, @@ -76,7 +76,7 @@ namespace nvidia::cuda { FLOAT = 0b00100000, }; - enum class cu_context_flags : std::uint32_t { + enum class context_flags : std::uint32_t { SCHEDULER_AUTO = 0x0, SCHEDULER_SPIN = 0x1, SCHEDULER_YIELD = 0x2, @@ -85,46 +85,46 @@ namespace nvidia::cuda { LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10, }; - enum class cu_stream_flags : std::uint32_t { + enum class stream_flags : std::uint32_t { DEFAULT = 0x0, NON_BLOCKING = 0x1, }; - typedef void* cu_array_t; - typedef void* cu_context_t; - typedef std::uint64_t cu_device_ptr_t; - typedef void* cu_graphics_resource_t; - typedef void* cu_stream_t; - typedef std::int32_t cu_device_t; + typedef void* array_t; + typedef void* context_t; + typedef std::uint64_t device_ptr_t; + typedef void* graphics_resource_t; + typedef void* stream_t; + typedef std::int32_t device_t; - struct cu_memcpy2d_t { + struct memcpy2d_t { std::size_t src_x_in_bytes; std::size_t src_y; - cu_memory_type src_memory_type; - const void* src_host; - cu_device_ptr_t src_device; - cu_array_t src_array; - std::size_t src_pitch; + memory_type src_memory_type; + const void* src_host; + device_ptr_t src_device; + array_t src_array; + std::size_t src_pitch; std::size_t dst_x_in_bytes; std::size_t dst_y; - cu_memory_type dst_memory_type; - const void* dst_host; - cu_device_ptr_t dst_device; - cu_array_t dst_array; - std::size_t dst_pitch; + memory_type dst_memory_type; + const void* dst_host; + device_ptr_t dst_device; + array_t dst_array; + std::size_t dst_pitch; std::size_t width_in_bytes; std::size_t height; }; - struct cu_array_descriptor_t { - std::size_t width; - std::size_t height; - std::uint32_t num_channels; - cu_array_format format; + struct array_descriptor_t { + std::size_t width; + std::size_t height; + std::uint32_t num_channels; + array_format format; }; class cuda { @@ -154,26 +154,26 @@ namespace nvidia::cuda { // Primary Context Management // cuDevicePrimaryCtxGetState - CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, cu_device_t device); + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device); // cuDevicePrimaryCtxReset_v2 - CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, cu_device_t device); - CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, cu_device_t device, cu_context_flags flags); + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device); + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags); // Context Management - CUDA_DEFINE_FUNCTION(cuCtxCreate, cu_context_t* ctx, cu_context_flags flags, cu_device_t device); - CUDA_DEFINE_FUNCTION(cuCtxDestroy, cu_context_t ctx); + CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device); + CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx); // cuCtxGetApiVersion // cuCtxGetCacheConfig - CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, cu_context_t* ctx); + CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx); // cuCtxGetDevice // cuCtxGetFlags // cuCtxGetLimit // cuCtxGetSharedMemConfig CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, std::int32_t* lowestPriority, std::int32_t* highestPriority); - CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx); - CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx); + CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx); + CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx); // cuCtxSetCacheConfig - CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, cu_context_t ctx); + CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx); // cuCtxSetLimit // cuCtxSetSharedMemConfig // cuCtxSynchronize @@ -201,7 +201,7 @@ namespace nvidia::cuda { // cuArray3DGetDescripter_v2 // cuArrayCreate_v2 // cuArrayDestroy - CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, cu_array_descriptor_t* pArrayDescripter, cu_array_t array); + CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_t* pArrayDescripter, array_t array); // cuArrayGetDescriptor_v2 // cuDeviceGetByPCIBusId // cuDeviceGetPCIBusId @@ -210,46 +210,42 @@ namespace nvidia::cuda { // cuIpcGetMemHandle // cuIpcOpenEventHandle // cuIpcOpenMemHandle - CUDA_DEFINE_FUNCTION(cuMemAlloc, cu_device_ptr_t* ptr, std::size_t bytes); + CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes); // cuMemAllocHost_v2 // cuMemAllocManaged - CUDA_DEFINE_FUNCTION(cuMemAllocPitch, cu_device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes, + CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes, std::size_t height, std::uint32_t element_size_bytes); - CUDA_DEFINE_FUNCTION(cuMemFree, cu_device_ptr_t ptr); + CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr); // cuMemFreeHost // cuMemGetAddressRange_v2 // cuMemGetInfo_v2 // cuMemHostAlloc - CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, cu_device_ptr_t* devptr, void* ptr, std::uint32_t flags); + CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, std::uint32_t flags); // cuMemHostGetFlags // cuMemHostRegister_v2 // cuMemHostUnregister - CUDA_DEFINE_FUNCTION(cuMemcpy, cu_device_ptr_t dst, cu_device_ptr_t src, std::size_t bytes); - CUDA_DEFINE_FUNCTION(cuMemcpy2D, const cu_memcpy2d_t* copy); - CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const cu_memcpy2d_t* copy, cu_stream_t stream); + CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes); + CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_t* copy); + CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_t* copy, stream_t stream); // cuMemcpy2DUnaligned_v2 / _v2_ptds // cuMemcpy3D_v2 / _v2_ptds // cuMemcpy3DAsync_v2 / _v2_ptsz // cuMemcpy3DPeer / _ptds // cuMemcpy3DPeerAsync_v2 / _v2_ptsz // cuMemcpyAsync / _ptsz - CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, cu_array_t dst, std::size_t dstOffset, cu_array_t src, std::size_t srcOffset, + CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, cu_device_ptr_t dst, cu_array_t src, std::size_t srcOffset, - std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, cu_array_t src, std::size_t srcOffset, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, cu_array_t src, std::size_t srcOffset, - std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, cu_array_t dst, std::size_t dstOffset, cu_device_ptr_t src, - std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, cu_device_ptr_t dst, cu_array_t srcArray, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, cu_array_t src, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, cu_array_t src, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, cu_array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, cu_array_t dst, std::size_t dstOffset, void* src, - std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, cu_device_ptr_t dst, void* src, std::size_t byteCount); - CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, cu_device_ptr_t dst, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, array_t dst, std::size_t dstOffset, device_ptr_t src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, device_ptr_t dst, array_t srcArray, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, array_t src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, array_t src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount); // cuMemcpyPeer / _ptds // cuMemcpyPeerAsync / _ptsz // cuMemsetD16 @@ -295,10 +291,9 @@ namespace nvidia::cuda { // cuStreamAddCallback // cuStreamAttachMemAsync // cuStreamBeginCapture_v2 - CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, cu_stream_flags flags); - CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, cu_stream_t* stream, cu_stream_flags flags, - std::int32_t priority); - CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream); + CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags); + CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, std::int32_t priority); + CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream); // cuStreamEndCapture // cuStreamGetCaptureInfo // cuStreamGetCtx @@ -306,7 +301,7 @@ namespace nvidia::cuda { // cuStreamGetPriority // cuStreamIsCapturing // cuStreamQuery - CUDA_DEFINE_FUNCTION(cuStreamSynchronize, cu_stream_t stream); + CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream); // cuStreamWaitEvent // cuThreadExchangeStreamCaptureMode @@ -361,16 +356,16 @@ namespace nvidia::cuda { // Todo // Graphics Interoperability - CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, cu_graphics_resource_t* resources, - cu_stream_t stream); + CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, graphics_resource_t* resources, + stream_t stream); // cuGraphicsResourcesGetMappedMipmappedArray // cuGraphicsResourcesGetMappedPointer_v2 // cuGraphicsResourcesSetMapFlags_v2 - CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, cu_array_t* array, cu_graphics_resource_t resource, + CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource, std::uint32_t index, std::uint32_t level); - CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, cu_graphics_resource_t* resources, - cu_stream_t stream); - CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, cu_graphics_resource_t resource); + CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, graphics_resource_t* resources, + stream_t stream); + CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource); // Profile Control // Todo @@ -396,13 +391,13 @@ namespace nvidia::cuda { // cuGraphicsD3D10RegisterResource // Direct3D11 Interopability - CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, cu_device_t* device, IDXGIAdapter* adapter); + CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter); // cuD3D11GetDevices - CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, cu_graphics_resource_t* resource, + CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource, ID3D11Resource* d3dresource, std::uint32_t flags); #endif }; } // namespace nvidia::cuda -P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_context_flags) -P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_stream_flags) +P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::context_flags) +P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::stream_flags)