nvidia/cuda: Remove 'cu_' prefix from types

This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2020-06-14 19:17:26 +02:00
parent 6ae69a5c71
commit 9658c1ee0f
11 changed files with 112 additions and 117 deletions

View file

@ -79,7 +79,7 @@ face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_
_geometry = std::make_shared<gs::vertex_buffer>(4, 1);
auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
_cuda_stream =
std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::cu_stream_flags::NON_BLOCKING, 0);
std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::stream_flags::NON_BLOCKING, 0);
}
{ // Asynchronously load Face Tracking.
@ -311,26 +311,26 @@ void face_tracking_instance::async_track(std::shared_ptr<void> ptr)
#ifdef ENABLE_PROFILING
auto prof = _profile_ar_copy->track();
#endif
::nvidia::cuda::cu_memcpy2d_t mc;
::nvidia::cuda::memcpy2d_t mc;
mc.src_x_in_bytes = 0;
mc.src_y = 0;
mc.src_memory_type = ::nvidia::cuda::cu_memory_type::ARRAY;
mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY;
mc.src_host = nullptr;
mc.src_device = 0;
mc.src_array = _ar_texture_cuda->map(_cuda_stream);
mc.src_pitch = static_cast<size_t>(_ar_image.pitch);
mc.dst_x_in_bytes = 0;
mc.dst_y = 0;
mc.dst_memory_type = ::nvidia::cuda::cu_memory_type::DEVICE;
mc.dst_memory_type = ::nvidia::cuda::memory_type::DEVICE;
mc.dst_host = 0;
mc.dst_device = reinterpret_cast<::nvidia::cuda::cu_device_ptr_t>(_ar_image.pixels);
mc.dst_device = reinterpret_cast<::nvidia::cuda::device_ptr_t>(_ar_image.pixels);
mc.dst_array = 0;
mc.dst_pitch = static_cast<size_t>(_ar_image.pitch);
mc.width_in_bytes = static_cast<size_t>(_ar_image.pitch);
mc.height = _ar_image.height;
if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get());
res != ::nvidia::cuda::cu_result::SUCCESS) {
if (::nvidia::cuda::result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get());
res != ::nvidia::cuda::result::SUCCESS) {
LOG_ERROR("<%s> Failed to prepare buffers for tracking.", obs_source_get_name(_self));
return;
}

View file

@ -31,7 +31,7 @@ nvidia::cuda::context_stack::context_stack(std::shared_ptr<::nvidia::cuda::cuda>
if (!context)
throw std::invalid_argument("context");
if (cu_result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != cu_result::SUCCESS) {
if (result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != result::SUCCESS) {
throw std::runtime_error("Failed to push context.");
}
}
@ -40,7 +40,7 @@ nvidia::cuda::context_stack::~context_stack()
{
using namespace ::nvidia::cuda;
cu_context_t ctx;
context_t ctx;
_cuda->cuCtxGetCurrent(&ctx);
if (ctx == _ctx->get()) {
_cuda->cuCtxPopCurrent(&ctx);

View file

@ -62,12 +62,12 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1
dxgi_device->GetAdapter(&dxgi_adapter);
// Get Device Index
if (cu_result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != cu_result::SUCCESS) {
if (result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != result::SUCCESS) {
throw std::runtime_error("Failed to get device index for device.");
}
// Acquire Context
if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) {
if (result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != result::SUCCESS) {
throw std::runtime_error("Failed to acquire primary device context.");
}
@ -75,7 +75,7 @@ nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D1
}
#endif
::nvidia::cuda::cu_context_t nvidia::cuda::context::get()
::nvidia::cuda::context_t nvidia::cuda::context::get()
{
return _ctx;
}

View file

@ -24,11 +24,11 @@
namespace nvidia::cuda {
class context {
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
::nvidia::cuda::cu_context_t _ctx;
::nvidia::cuda::context_t _ctx;
// Primary Device Context
bool _has_device;
::nvidia::cuda::cu_device_t _device;
::nvidia::cuda::device_t _device;
private:
context(std::shared_ptr<::nvidia::cuda::cuda> cuda);
@ -40,6 +40,6 @@ namespace nvidia::cuda {
context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device);
#endif
::nvidia::cuda::cu_context_t get();
::nvidia::cuda::context_t get();
};
} // namespace nvidia::cuda

View file

@ -54,7 +54,7 @@ nvidia::cuda::gstexture::gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std
}
switch (_cuda->cuGraphicsD3D11RegisterResource(&_resource, resource, 0)) {
case nvidia::cuda::cu_result::SUCCESS:
case nvidia::cuda::result::SUCCESS:
break;
default:
throw std::runtime_error("nvidia::cuda::gstexture: Failed to register resource.");
@ -69,15 +69,15 @@ nvidia::cuda::gstexture::~gstexture()
_cuda->cuGraphicsUnregisterResource(_resource);
}
nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cuda::stream> stream)
nvidia::cuda::array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cuda::stream> stream)
{
if (_is_mapped) {
return _pointer;
}
cu_graphics_resource_t resources[] = {_resource};
graphics_resource_t resources[] = {_resource};
switch (_cuda->cuGraphicsMapResources(1, resources, stream->get())) {
case nvidia::cuda::cu_result::SUCCESS:
case nvidia::cuda::result::SUCCESS:
break;
default:
throw std::runtime_error("nvidia::cuda::gstexture: Mapping failed.");
@ -87,7 +87,7 @@ nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cu
_is_mapped = true;
switch (_cuda->cuGraphicsSubResourceGetMappedArray(&_pointer, _resource, 0, 0)) {
case nvidia::cuda::cu_result::SUCCESS:
case nvidia::cuda::result::SUCCESS:
break;
default:
unmap();
@ -102,9 +102,9 @@ void nvidia::cuda::gstexture::unmap()
if (!_is_mapped)
return;
cu_graphics_resource_t resources[] = {_resource};
graphics_resource_t resources[] = {_resource};
switch (_cuda->cuGraphicsUnmapResources(1, resources, _stream->get())) {
case nvidia::cuda::cu_result::SUCCESS:
case nvidia::cuda::result::SUCCESS:
break;
default:
throw std::runtime_error("nvidia::cuda::gstexture: Unmapping failed.");

View file

@ -28,17 +28,17 @@ namespace nvidia::cuda {
class gstexture {
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
std::shared_ptr<gs::texture> _texture;
cu_graphics_resource_t _resource;
graphics_resource_t _resource;
bool _is_mapped;
cu_array_t _pointer;
array_t _pointer;
std::shared_ptr<nvidia::cuda::stream> _stream;
public:
gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std::shared_ptr<gs::texture> texture);
~gstexture();
cu_array_t map(std::shared_ptr<nvidia::cuda::stream> stream);
array_t map(std::shared_ptr<nvidia::cuda::stream> stream);
void unmap();
};
} // namespace nvidia::cuda

View file

@ -23,9 +23,9 @@
nvidia::cuda::memory::memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size)
: _cuda(cuda), _pointer(), _size(size)
{
::nvidia::cuda::cu_result res = _cuda->cuMemAlloc(&_pointer, size);
::nvidia::cuda::result res = _cuda->cuMemAlloc(&_pointer, size);
switch (res) {
case ::nvidia::cuda::cu_result::SUCCESS:
case ::nvidia::cuda::result::SUCCESS:
break;
default:
throw std::runtime_error("nvidia::cuda::memory: cuMemAlloc failed.");
@ -37,7 +37,7 @@ nvidia::cuda::memory::~memory()
_cuda->cuMemFree(_pointer);
}
nvidia::cuda::cu_device_ptr_t nvidia::cuda::memory::get()
nvidia::cuda::device_ptr_t nvidia::cuda::memory::get()
{
return _pointer;
}

View file

@ -25,14 +25,14 @@
namespace nvidia::cuda {
class memory {
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
cu_device_ptr_t _pointer;
device_ptr_t _pointer;
std::size_t _size;
public:
memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size);
~memory();
cu_device_ptr_t get();
device_ptr_t get();
std::size_t size();
};

View file

@ -20,18 +20,18 @@
#include "nvidia-cuda-stream.hpp"
#include <stdexcept>
nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::cu_stream_flags flags,
nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda, ::nvidia::cuda::stream_flags flags,
std::int32_t priority)
: _cuda(cuda)
{
nvidia::cuda::cu_result res;
nvidia::cuda::result res;
if (priority == 0) {
res = _cuda->cuStreamCreate(&_stream, flags);
} else {
res = _cuda->cuStreamCreateWithPriority(&_stream, flags, priority);
}
switch (res) {
case nvidia::cuda::cu_result::SUCCESS:
case nvidia::cuda::result::SUCCESS:
break;
default:
throw std::runtime_error("Failed to create CUstream object.");
@ -43,7 +43,7 @@ nvidia::cuda::stream::~stream()
_cuda->cuStreamDestroy(_stream);
}
::nvidia::cuda::cu_stream_t nvidia::cuda::stream::get()
::nvidia::cuda::stream_t nvidia::cuda::stream::get()
{
return _stream;
}

View file

@ -24,14 +24,14 @@
namespace nvidia::cuda {
class stream {
std::shared_ptr<::nvidia::cuda::cuda> _cuda;
::nvidia::cuda::cu_stream_t _stream;
::nvidia::cuda::stream_t _stream;
public:
stream(std::shared_ptr<::nvidia::cuda::cuda> cuda,
::nvidia::cuda::cu_stream_flags flags = ::nvidia::cuda::cu_stream_flags::DEFAULT,
::nvidia::cuda::stream_flags flags = ::nvidia::cuda::stream_flags::DEFAULT,
std::int32_t priority = 0);
~stream();
::nvidia::cuda::cu_stream_t get();
::nvidia::cuda::stream_t get();
};
} // namespace nvidia::cuda

View file

@ -32,15 +32,15 @@
#pragma warning(pop)
#endif
#define CUDA_DEFINE_FUNCTION(name, ...) \
private: \
typedef ::nvidia::cuda::cu_result (*t##name)(__VA_ARGS__); \
\
public: \
#define CUDA_DEFINE_FUNCTION(name, ...) \
private: \
typedef ::nvidia::cuda::result (*t##name)(__VA_ARGS__); \
\
public: \
t##name name;
namespace nvidia::cuda {
enum class cu_result : std::size_t {
enum class result : std::size_t {
SUCCESS = 0,
INVALID_VALUE = 1,
OUT_OF_MEMORY = 2,
@ -58,14 +58,14 @@ namespace nvidia::cuda {
// Still missing some.
};
enum class cu_memory_type : std::uint32_t {
enum class memory_type : std::uint32_t {
HOST = 1,
DEVICE = 2,
ARRAY = 3,
UNIFIED = 4,
};
enum class cu_array_format : std::uint32_t {
enum class array_format : std::uint32_t {
UNSIGNED_INT8 = 0b00000001,
UNSIGNED_INT16 = 0b00000010,
UNSIGNED_INT32 = 0b00000011,
@ -76,7 +76,7 @@ namespace nvidia::cuda {
FLOAT = 0b00100000,
};
enum class cu_context_flags : std::uint32_t {
enum class context_flags : std::uint32_t {
SCHEDULER_AUTO = 0x0,
SCHEDULER_SPIN = 0x1,
SCHEDULER_YIELD = 0x2,
@ -85,46 +85,46 @@ namespace nvidia::cuda {
LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10,
};
enum class cu_stream_flags : std::uint32_t {
enum class stream_flags : std::uint32_t {
DEFAULT = 0x0,
NON_BLOCKING = 0x1,
};
typedef void* cu_array_t;
typedef void* cu_context_t;
typedef std::uint64_t cu_device_ptr_t;
typedef void* cu_graphics_resource_t;
typedef void* cu_stream_t;
typedef std::int32_t cu_device_t;
typedef void* array_t;
typedef void* context_t;
typedef std::uint64_t device_ptr_t;
typedef void* graphics_resource_t;
typedef void* stream_t;
typedef std::int32_t device_t;
struct cu_memcpy2d_t {
struct memcpy2d_t {
std::size_t src_x_in_bytes;
std::size_t src_y;
cu_memory_type src_memory_type;
const void* src_host;
cu_device_ptr_t src_device;
cu_array_t src_array;
std::size_t src_pitch;
memory_type src_memory_type;
const void* src_host;
device_ptr_t src_device;
array_t src_array;
std::size_t src_pitch;
std::size_t dst_x_in_bytes;
std::size_t dst_y;
cu_memory_type dst_memory_type;
const void* dst_host;
cu_device_ptr_t dst_device;
cu_array_t dst_array;
std::size_t dst_pitch;
memory_type dst_memory_type;
const void* dst_host;
device_ptr_t dst_device;
array_t dst_array;
std::size_t dst_pitch;
std::size_t width_in_bytes;
std::size_t height;
};
struct cu_array_descriptor_t {
std::size_t width;
std::size_t height;
std::uint32_t num_channels;
cu_array_format format;
struct array_descriptor_t {
std::size_t width;
std::size_t height;
std::uint32_t num_channels;
array_format format;
};
class cuda {
@ -154,26 +154,26 @@ namespace nvidia::cuda {
// Primary Context Management
// cuDevicePrimaryCtxGetState
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, cu_device_t device);
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device);
// cuDevicePrimaryCtxReset_v2
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, cu_device_t device);
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, cu_device_t device, cu_context_flags flags);
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device);
CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags);
// Context Management
CUDA_DEFINE_FUNCTION(cuCtxCreate, cu_context_t* ctx, cu_context_flags flags, cu_device_t device);
CUDA_DEFINE_FUNCTION(cuCtxDestroy, cu_context_t ctx);
CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device);
CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx);
// cuCtxGetApiVersion
// cuCtxGetCacheConfig
CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, cu_context_t* ctx);
CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx);
// cuCtxGetDevice
// cuCtxGetFlags
// cuCtxGetLimit
// cuCtxGetSharedMemConfig
CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, std::int32_t* lowestPriority, std::int32_t* highestPriority);
CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx);
CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx);
CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx);
CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx);
// cuCtxSetCacheConfig
CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, cu_context_t ctx);
CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx);
// cuCtxSetLimit
// cuCtxSetSharedMemConfig
// cuCtxSynchronize
@ -201,7 +201,7 @@ namespace nvidia::cuda {
// cuArray3DGetDescripter_v2
// cuArrayCreate_v2
// cuArrayDestroy
CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, cu_array_descriptor_t* pArrayDescripter, cu_array_t array);
CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_t* pArrayDescripter, array_t array);
// cuArrayGetDescriptor_v2
// cuDeviceGetByPCIBusId
// cuDeviceGetPCIBusId
@ -210,46 +210,42 @@ namespace nvidia::cuda {
// cuIpcGetMemHandle
// cuIpcOpenEventHandle
// cuIpcOpenMemHandle
CUDA_DEFINE_FUNCTION(cuMemAlloc, cu_device_ptr_t* ptr, std::size_t bytes);
CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes);
// cuMemAllocHost_v2
// cuMemAllocManaged
CUDA_DEFINE_FUNCTION(cuMemAllocPitch, cu_device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
std::size_t height, std::uint32_t element_size_bytes);
CUDA_DEFINE_FUNCTION(cuMemFree, cu_device_ptr_t ptr);
CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr);
// cuMemFreeHost
// cuMemGetAddressRange_v2
// cuMemGetInfo_v2
// cuMemHostAlloc
CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, cu_device_ptr_t* devptr, void* ptr, std::uint32_t flags);
CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, std::uint32_t flags);
// cuMemHostGetFlags
// cuMemHostRegister_v2
// cuMemHostUnregister
CUDA_DEFINE_FUNCTION(cuMemcpy, cu_device_ptr_t dst, cu_device_ptr_t src, std::size_t bytes);
CUDA_DEFINE_FUNCTION(cuMemcpy2D, const cu_memcpy2d_t* copy);
CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const cu_memcpy2d_t* copy, cu_stream_t stream);
CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes);
CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_t* copy);
CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_t* copy, stream_t stream);
// cuMemcpy2DUnaligned_v2 / _v2_ptds
// cuMemcpy3D_v2 / _v2_ptds
// cuMemcpy3DAsync_v2 / _v2_ptsz
// cuMemcpy3DPeer / _ptds
// cuMemcpy3DPeerAsync_v2 / _v2_ptsz
// cuMemcpyAsync / _ptsz
CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, cu_array_t dst, std::size_t dstOffset, cu_array_t src, std::size_t srcOffset,
CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset,
std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, cu_device_ptr_t dst, cu_array_t src, std::size_t srcOffset,
std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, cu_array_t src, std::size_t srcOffset, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, cu_array_t src, std::size_t srcOffset,
std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, cu_array_t dst, std::size_t dstOffset, cu_device_ptr_t src,
std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, cu_device_ptr_t dst, cu_array_t srcArray, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, cu_array_t src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, cu_array_t src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, cu_array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, cu_array_t dst, std::size_t dstOffset, void* src,
std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, cu_device_ptr_t dst, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, cu_device_ptr_t dst, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, array_t dst, std::size_t dstOffset, device_ptr_t src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, device_ptr_t dst, array_t srcArray, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, array_t src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, array_t src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount);
CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount);
// cuMemcpyPeer / _ptds
// cuMemcpyPeerAsync / _ptsz
// cuMemsetD16
@ -295,10 +291,9 @@ namespace nvidia::cuda {
// cuStreamAddCallback
// cuStreamAttachMemAsync
// cuStreamBeginCapture_v2
CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, cu_stream_flags flags);
CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, cu_stream_t* stream, cu_stream_flags flags,
std::int32_t priority);
CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream);
CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags);
CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, std::int32_t priority);
CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream);
// cuStreamEndCapture
// cuStreamGetCaptureInfo
// cuStreamGetCtx
@ -306,7 +301,7 @@ namespace nvidia::cuda {
// cuStreamGetPriority
// cuStreamIsCapturing
// cuStreamQuery
CUDA_DEFINE_FUNCTION(cuStreamSynchronize, cu_stream_t stream);
CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream);
// cuStreamWaitEvent
// cuThreadExchangeStreamCaptureMode
@ -361,16 +356,16 @@ namespace nvidia::cuda {
// Todo
// Graphics Interoperability
CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, cu_graphics_resource_t* resources,
cu_stream_t stream);
CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, graphics_resource_t* resources,
stream_t stream);
// cuGraphicsResourcesGetMappedMipmappedArray
// cuGraphicsResourcesGetMappedPointer_v2
// cuGraphicsResourcesSetMapFlags_v2
CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, cu_array_t* array, cu_graphics_resource_t resource,
CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource,
std::uint32_t index, std::uint32_t level);
CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, cu_graphics_resource_t* resources,
cu_stream_t stream);
CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, cu_graphics_resource_t resource);
CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, graphics_resource_t* resources,
stream_t stream);
CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource);
// Profile Control
// Todo
@ -396,13 +391,13 @@ namespace nvidia::cuda {
// cuGraphicsD3D10RegisterResource
// Direct3D11 Interopability
CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, cu_device_t* device, IDXGIAdapter* adapter);
CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter);
// cuD3D11GetDevices
CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, cu_graphics_resource_t* resource,
CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource,
ID3D11Resource* d3dresource, std::uint32_t flags);
#endif
};
} // namespace nvidia::cuda
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_context_flags)
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::cu_stream_flags)
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::context_flags)
P_ENABLE_BITMASK_OPERATORS(::nvidia::cuda::stream_flags)